30 #define DEBUG_TYPE "si-memory-legalizer"
31 #define PASS_NAME "SI Memory Legalizer"
35 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
57 enum class SIAtomicScope {
68 enum class SIAtomicAddrSpace {
88 class SIMemOpInfo final {
91 friend class SIMemOpAccess;
95 SIAtomicScope
Scope = SIAtomicScope::SYSTEM;
98 bool IsCrossAddressSpaceOrdering =
false;
100 bool IsNonTemporal =
false;
103 SIAtomicScope
Scope = SIAtomicScope::SYSTEM,
104 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
106 bool IsCrossAddressSpaceOrdering =
true,
110 bool IsNonTemporal =
false)
111 : Ordering(Ordering), FailureOrdering(FailureOrdering),
112 Scope(
Scope), OrderingAddrSpace(OrderingAddrSpace),
113 InstrAddrSpace(InstrAddrSpace),
114 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
116 IsNonTemporal(IsNonTemporal) {
121 !IsCrossAddressSpaceOrdering &&
127 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
129 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
135 if ((OrderingAddrSpace == InstrAddrSpace) &&
137 this->IsCrossAddressSpaceOrdering =
false;
141 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
144 }
else if ((InstrAddrSpace &
148 }
else if ((InstrAddrSpace &
171 return FailureOrdering;
176 SIAtomicAddrSpace getInstrAddrSpace()
const {
177 return InstrAddrSpace;
182 SIAtomicAddrSpace getOrderingAddrSpace()
const {
183 return OrderingAddrSpace;
188 bool getIsCrossAddressSpaceOrdering()
const {
189 return IsCrossAddressSpaceOrdering;
194 bool isVolatile()
const {
200 bool isNonTemporal()
const {
201 return IsNonTemporal;
206 bool isAtomic()
const {
212 class SIMemOpAccess final {
218 const char *
Msg)
const;
225 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
228 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
259 class SICacheControl {
283 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &
ST);
290 SIAtomicAddrSpace AddrSpace)
const = 0;
297 SIAtomicAddrSpace AddrSpace)
const = 0;
304 SIAtomicAddrSpace AddrSpace)
const = 0;
310 SIAtomicAddrSpace AddrSpace,
312 bool IsNonTemporal)
const = 0;
323 SIAtomicAddrSpace AddrSpace,
325 bool IsCrossAddrSpaceOrdering,
326 Position Pos)
const = 0;
335 SIAtomicAddrSpace AddrSpace,
336 Position Pos)
const = 0;
346 SIAtomicAddrSpace AddrSpace,
347 bool IsCrossAddrSpaceOrdering,
348 Position Pos)
const = 0;
351 virtual ~SICacheControl() =
default;
355 class SIGfx6CacheControl :
public SICacheControl {
376 SIAtomicAddrSpace AddrSpace)
const override;
380 SIAtomicAddrSpace AddrSpace)
const override;
384 SIAtomicAddrSpace AddrSpace)
const override;
387 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
389 bool IsNonTemporal)
const override;
393 SIAtomicAddrSpace AddrSpace,
395 bool IsCrossAddrSpaceOrdering,
396 Position Pos)
const override;
400 SIAtomicAddrSpace AddrSpace,
401 Position Pos)
const override;
405 SIAtomicAddrSpace AddrSpace,
406 bool IsCrossAddrSpaceOrdering,
407 Position Pos)
const override;
410 class SIGfx7CacheControl :
public SIGfx6CacheControl {
417 SIAtomicAddrSpace AddrSpace,
418 Position Pos)
const override;
422 class SIGfx90ACacheControl :
public SIGfx7CacheControl {
429 SIAtomicAddrSpace AddrSpace)
const override;
433 SIAtomicAddrSpace AddrSpace)
const override;
437 SIAtomicAddrSpace AddrSpace)
const override;
440 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
442 bool IsNonTemporal)
const override;
446 SIAtomicAddrSpace AddrSpace,
448 bool IsCrossAddrSpaceOrdering,
449 Position Pos)
const override;
453 SIAtomicAddrSpace AddrSpace,
454 Position Pos)
const override;
458 SIAtomicAddrSpace AddrSpace,
459 bool IsCrossAddrSpaceOrdering,
460 Position Pos)
const override;
463 class SIGfx940CacheControl :
public SIGfx90ACacheControl {
490 SIAtomicAddrSpace AddrSpace)
const override;
494 SIAtomicAddrSpace AddrSpace)
const override;
498 SIAtomicAddrSpace AddrSpace)
const override;
501 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
503 bool IsNonTemporal)
const override;
506 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
509 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
510 Position Pos)
const override;
513 class SIGfx10CacheControl :
public SIGfx7CacheControl {
528 SIAtomicAddrSpace AddrSpace)
const override;
531 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
533 bool IsNonTemporal)
const override;
537 SIAtomicAddrSpace AddrSpace,
539 bool IsCrossAddrSpaceOrdering,
540 Position Pos)
const override;
544 SIAtomicAddrSpace AddrSpace,
545 Position Pos)
const override;
548 class SIGfx11CacheControl :
public SIGfx10CacheControl {
554 SIAtomicAddrSpace AddrSpace)
const override;
557 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
559 bool IsNonTemporal)
const override;
566 std::unique_ptr<SICacheControl> CC =
nullptr;
569 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
579 bool removeAtomicPseudoMIs();
583 bool expandLoad(
const SIMemOpInfo &MOI,
587 bool expandStore(
const SIMemOpInfo &MOI,
591 bool expandAtomicFence(
const SIMemOpInfo &MOI,
595 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
618 const char *
Msg)
const {
619 const Function &
Func =
MI->getParent()->getParent()->getFunction();
621 Func.getContext().diagnose(Diag);
626 SIAtomicAddrSpace InstrAddrSpace)
const {
628 return std::make_tuple(SIAtomicScope::SYSTEM,
629 SIAtomicAddrSpace::ATOMIC,
631 if (SSID == MMI->getAgentSSID())
632 return std::make_tuple(SIAtomicScope::AGENT,
633 SIAtomicAddrSpace::ATOMIC,
635 if (SSID == MMI->getWorkgroupSSID())
636 return std::make_tuple(SIAtomicScope::WORKGROUP,
637 SIAtomicAddrSpace::ATOMIC,
639 if (SSID == MMI->getWavefrontSSID())
640 return std::make_tuple(SIAtomicScope::WAVEFRONT,
641 SIAtomicAddrSpace::ATOMIC,
644 return std::make_tuple(SIAtomicScope::SINGLETHREAD,
645 SIAtomicAddrSpace::ATOMIC,
647 if (SSID == MMI->getSystemOneAddressSpaceSSID())
648 return std::make_tuple(SIAtomicScope::SYSTEM,
649 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
651 if (SSID == MMI->getAgentOneAddressSpaceSSID())
652 return std::make_tuple(SIAtomicScope::AGENT,
653 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
655 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
656 return std::make_tuple(SIAtomicScope::WORKGROUP,
657 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
659 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
660 return std::make_tuple(SIAtomicScope::WAVEFRONT,
661 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
663 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
664 return std::make_tuple(SIAtomicScope::SINGLETHREAD,
665 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
670 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
678 return SIAtomicAddrSpace::SCRATCH;
680 return SIAtomicAddrSpace::GDS;
682 return SIAtomicAddrSpace::OTHER;
691 assert(
MI->getNumMemOperands() > 0);
697 bool IsNonTemporal =
true;
702 for (
const auto &MMO :
MI->memoperands()) {
703 IsNonTemporal &= MMO->isNonTemporal();
706 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
709 const auto &IsSyncScopeInclusion =
710 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
711 if (!IsSyncScopeInclusion) {
712 reportUnsupported(
MI,
713 "Unsupported non-inclusive atomic synchronization scope");
717 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
728 bool IsCrossAddressSpaceOrdering =
false;
730 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
732 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
735 std::tie(
Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
738 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
740 reportUnsupported(
MI,
"Unsupported atomic address space");
744 return SIMemOpInfo(Ordering,
Scope, OrderingAddrSpace, InstrAddrSpace,
745 IsCrossAddressSpaceOrdering, FailureOrdering,
IsVolatile,
753 if (!(
MI->mayLoad() && !
MI->mayStore()))
757 if (
MI->getNumMemOperands() == 0)
758 return SIMemOpInfo();
760 return constructFromMIWithMMO(
MI);
767 if (!(!
MI->mayLoad() &&
MI->mayStore()))
771 if (
MI->getNumMemOperands() == 0)
772 return SIMemOpInfo();
774 return constructFromMIWithMMO(
MI);
788 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
790 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
796 bool IsCrossAddressSpaceOrdering =
false;
797 std::tie(
Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
801 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
802 reportUnsupported(
MI,
"Unsupported atomic address space");
806 return SIMemOpInfo(Ordering,
Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
814 if (!(
MI->mayLoad() &&
MI->mayStore()))
818 if (
MI->getNumMemOperands() == 0)
819 return SIMemOpInfo();
821 return constructFromMIWithMMO(
MI);
825 TII =
ST.getInstrInfo();
841 std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &
ST) {
842 GCNSubtarget::Generation Generation =
ST.getGeneration();
843 if (
ST.hasGFX940Insts())
844 return std::make_unique<SIGfx940CacheControl>(
ST);
845 if (
ST.hasGFX90AInsts())
846 return std::make_unique<SIGfx90ACacheControl>(
ST);
847 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
848 return std::make_unique<SIGfx6CacheControl>(
ST);
850 return std::make_unique<SIGfx7CacheControl>(
ST);
852 return std::make_unique<SIGfx10CacheControl>(
ST);
853 return std::make_unique<SIGfx11CacheControl>(
ST);
856 bool SIGfx6CacheControl::enableLoadCacheBypass(
859 SIAtomicAddrSpace AddrSpace)
const {
861 bool Changed =
false;
865 case SIAtomicScope::SYSTEM:
866 case SIAtomicScope::AGENT:
869 Changed |= enableGLCBit(
MI);
871 case SIAtomicScope::WORKGROUP:
872 case SIAtomicScope::WAVEFRONT:
873 case SIAtomicScope::SINGLETHREAD:
891 bool SIGfx6CacheControl::enableStoreCacheBypass(
894 SIAtomicAddrSpace AddrSpace)
const {
896 bool Changed =
false;
904 bool SIGfx6CacheControl::enableRMWCacheBypass(
907 SIAtomicAddrSpace AddrSpace)
const {
909 bool Changed =
false;
919 bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
933 bool Changed =
false;
940 Changed |= enableGLCBit(
MI);
947 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
956 Changed |= enableGLCBit(
MI);
957 Changed |= enableSLCBit(
MI);
966 SIAtomicAddrSpace AddrSpace,
968 bool IsCrossAddrSpaceOrdering,
969 Position Pos)
const {
970 bool Changed =
false;
975 if (Pos == Position::AFTER)
979 bool LGKMCnt =
false;
984 case SIAtomicScope::SYSTEM:
985 case SIAtomicScope::AGENT:
988 case SIAtomicScope::WORKGROUP:
989 case SIAtomicScope::WAVEFRONT:
990 case SIAtomicScope::SINGLETHREAD:
1001 case SIAtomicScope::SYSTEM:
1002 case SIAtomicScope::AGENT:
1003 case SIAtomicScope::WORKGROUP:
1010 LGKMCnt |= IsCrossAddrSpaceOrdering;
1012 case SIAtomicScope::WAVEFRONT:
1013 case SIAtomicScope::SINGLETHREAD:
1024 case SIAtomicScope::SYSTEM:
1025 case SIAtomicScope::AGENT:
1032 LGKMCnt |= IsCrossAddrSpaceOrdering;
1034 case SIAtomicScope::WORKGROUP:
1035 case SIAtomicScope::WAVEFRONT:
1036 case SIAtomicScope::SINGLETHREAD:
1045 if (VMCnt || LGKMCnt) {
1046 unsigned WaitCntImmediate =
1055 if (Pos == Position::AFTER)
1062 SIAtomicScope
Scope,
1063 SIAtomicAddrSpace AddrSpace,
1064 Position Pos)
const {
1065 if (!InsertCacheInv)
1068 bool Changed =
false;
1073 if (Pos == Position::AFTER)
1078 case SIAtomicScope::SYSTEM:
1079 case SIAtomicScope::AGENT:
1083 case SIAtomicScope::WORKGROUP:
1084 case SIAtomicScope::WAVEFRONT:
1085 case SIAtomicScope::SINGLETHREAD:
1100 if (Pos == Position::AFTER)
1107 SIAtomicScope
Scope,
1108 SIAtomicAddrSpace AddrSpace,
1109 bool IsCrossAddrSpaceOrdering,
1110 Position Pos)
const {
1112 IsCrossAddrSpaceOrdering, Pos);
1116 SIAtomicScope
Scope,
1117 SIAtomicAddrSpace AddrSpace,
1118 Position Pos)
const {
1119 if (!InsertCacheInv)
1122 bool Changed =
false;
1130 ? AMDGPU::BUFFER_WBINVL1
1131 : AMDGPU::BUFFER_WBINVL1_VOL;
1133 if (Pos == Position::AFTER)
1138 case SIAtomicScope::SYSTEM:
1139 case SIAtomicScope::AGENT:
1143 case SIAtomicScope::WORKGROUP:
1144 case SIAtomicScope::WAVEFRONT:
1145 case SIAtomicScope::SINGLETHREAD:
1160 if (Pos == Position::AFTER)
1166 bool SIGfx90ACacheControl::enableLoadCacheBypass(
1168 SIAtomicScope
Scope,
1169 SIAtomicAddrSpace AddrSpace)
const {
1171 bool Changed =
false;
1175 case SIAtomicScope::SYSTEM:
1176 case SIAtomicScope::AGENT:
1179 Changed |= enableGLCBit(
MI);
1181 case SIAtomicScope::WORKGROUP:
1186 if (
ST.isTgSplitEnabled())
1187 Changed |= enableGLCBit(
MI);
1189 case SIAtomicScope::WAVEFRONT:
1190 case SIAtomicScope::SINGLETHREAD:
1208 bool SIGfx90ACacheControl::enableStoreCacheBypass(
1210 SIAtomicScope
Scope,
1211 SIAtomicAddrSpace AddrSpace)
const {
1213 bool Changed =
false;
1217 case SIAtomicScope::SYSTEM:
1218 case SIAtomicScope::AGENT:
1222 case SIAtomicScope::WORKGROUP:
1223 case SIAtomicScope::WAVEFRONT:
1224 case SIAtomicScope::SINGLETHREAD:
1243 bool SIGfx90ACacheControl::enableRMWCacheBypass(
1245 SIAtomicScope
Scope,
1246 SIAtomicAddrSpace AddrSpace)
const {
1248 bool Changed =
false;
1252 case SIAtomicScope::SYSTEM:
1253 case SIAtomicScope::AGENT:
1258 case SIAtomicScope::WORKGROUP:
1259 case SIAtomicScope::WAVEFRONT:
1260 case SIAtomicScope::SINGLETHREAD:
1271 bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1285 bool Changed =
false;
1292 Changed |= enableGLCBit(
MI);
1299 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1305 if (IsNonTemporal) {
1308 Changed |= enableGLCBit(
MI);
1309 Changed |= enableSLCBit(
MI);
1317 SIAtomicScope
Scope,
1318 SIAtomicAddrSpace AddrSpace,
1320 bool IsCrossAddrSpaceOrdering,
1321 Position Pos)
const {
1322 if (
ST.isTgSplitEnabled()) {
1332 (
Scope == SIAtomicScope::WORKGROUP)) {
1334 Scope = SIAtomicScope::AGENT;
1338 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1340 return SIGfx7CacheControl::insertWait(
MI,
Scope, AddrSpace,
Op,
1341 IsCrossAddrSpaceOrdering, Pos);
1345 SIAtomicScope
Scope,
1346 SIAtomicAddrSpace AddrSpace,
1347 Position Pos)
const {
1348 if (!InsertCacheInv)
1351 bool Changed =
false;
1356 if (Pos == Position::AFTER)
1361 case SIAtomicScope::SYSTEM:
1373 case SIAtomicScope::AGENT:
1376 case SIAtomicScope::WORKGROUP:
1381 if (
ST.isTgSplitEnabled()) {
1383 Scope = SIAtomicScope::AGENT;
1386 case SIAtomicScope::WAVEFRONT:
1387 case SIAtomicScope::SINGLETHREAD:
1402 if (Pos == Position::AFTER)
1405 Changed |= SIGfx7CacheControl::insertAcquire(
MI,
Scope, AddrSpace, Pos);
1411 SIAtomicScope
Scope,
1412 SIAtomicAddrSpace AddrSpace,
1413 bool IsCrossAddrSpaceOrdering,
1414 Position Pos)
const {
1415 bool Changed =
false;
1420 if (Pos == Position::AFTER)
1425 case SIAtomicScope::SYSTEM:
1439 case SIAtomicScope::AGENT:
1440 case SIAtomicScope::WORKGROUP:
1441 case SIAtomicScope::WAVEFRONT:
1442 case SIAtomicScope::SINGLETHREAD:
1450 if (Pos == Position::AFTER)
1454 SIGfx7CacheControl::insertRelease(
MI,
Scope, AddrSpace,
1455 IsCrossAddrSpaceOrdering, Pos);
1460 bool SIGfx940CacheControl::enableLoadCacheBypass(
1462 SIAtomicAddrSpace AddrSpace)
const {
1464 bool Changed =
false;
1468 case SIAtomicScope::SYSTEM:
1470 Changed |= enableSC0Bit(
MI);
1471 Changed |= enableSC1Bit(
MI);
1473 case SIAtomicScope::AGENT:
1475 Changed |= enableSC1Bit(
MI);
1477 case SIAtomicScope::WORKGROUP:
1483 Changed |= enableSC0Bit(
MI);
1485 case SIAtomicScope::WAVEFRONT:
1486 case SIAtomicScope::SINGLETHREAD:
1504 bool SIGfx940CacheControl::enableStoreCacheBypass(
1506 SIAtomicScope
Scope, SIAtomicAddrSpace AddrSpace)
const {
1508 bool Changed =
false;
1512 case SIAtomicScope::SYSTEM:
1514 Changed |= enableSC0Bit(
MI);
1515 Changed |= enableSC1Bit(
MI);
1517 case SIAtomicScope::AGENT:
1519 Changed |= enableSC1Bit(
MI);
1521 case SIAtomicScope::WORKGROUP:
1523 Changed |= enableSC0Bit(
MI);
1525 case SIAtomicScope::WAVEFRONT:
1526 case SIAtomicScope::SINGLETHREAD:
1544 bool SIGfx940CacheControl::enableRMWCacheBypass(
1546 SIAtomicAddrSpace AddrSpace)
const {
1548 bool Changed =
false;
1552 case SIAtomicScope::SYSTEM:
1554 Changed |= enableSC1Bit(
MI);
1556 case SIAtomicScope::AGENT:
1557 case SIAtomicScope::WORKGROUP:
1558 case SIAtomicScope::WAVEFRONT:
1559 case SIAtomicScope::SINGLETHREAD:
1573 bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1587 bool Changed =
false;
1591 Changed |= enableSC0Bit(
MI);
1592 Changed |= enableSC1Bit(
MI);
1599 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1605 if (IsNonTemporal) {
1606 Changed |= enableNTBit(
MI);
1614 SIAtomicScope
Scope,
1615 SIAtomicAddrSpace AddrSpace,
1616 Position Pos)
const {
1617 if (!InsertCacheInv)
1620 bool Changed =
false;
1625 if (Pos == Position::AFTER)
1630 case SIAtomicScope::SYSTEM:
1644 case SIAtomicScope::AGENT:
1657 case SIAtomicScope::WORKGROUP:
1662 if (
ST.isTgSplitEnabled()) {
1676 case SIAtomicScope::WAVEFRONT:
1677 case SIAtomicScope::SINGLETHREAD:
1693 if (Pos == Position::AFTER)
1700 SIAtomicScope
Scope,
1701 SIAtomicAddrSpace AddrSpace,
1702 bool IsCrossAddrSpaceOrdering,
1703 Position Pos)
const {
1704 bool Changed =
false;
1709 if (Pos == Position::AFTER)
1714 case SIAtomicScope::SYSTEM:
1729 case SIAtomicScope::AGENT:
1739 case SIAtomicScope::WORKGROUP:
1740 case SIAtomicScope::WAVEFRONT:
1741 case SIAtomicScope::SINGLETHREAD:
1751 if (Pos == Position::AFTER)
1757 IsCrossAddrSpaceOrdering, Pos);
1762 bool SIGfx10CacheControl::enableLoadCacheBypass(
1764 SIAtomicScope
Scope,
1765 SIAtomicAddrSpace AddrSpace)
const {
1767 bool Changed =
false;
1771 case SIAtomicScope::SYSTEM:
1772 case SIAtomicScope::AGENT:
1775 Changed |= enableGLCBit(
MI);
1776 Changed |= enableDLCBit(
MI);
1778 case SIAtomicScope::WORKGROUP:
1783 if (!
ST.isCuModeEnabled())
1784 Changed |= enableGLCBit(
MI);
1786 case SIAtomicScope::WAVEFRONT:
1787 case SIAtomicScope::SINGLETHREAD:
1805 bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1820 bool Changed =
false;
1827 Changed |= enableGLCBit(
MI);
1828 Changed |= enableDLCBit(
MI);
1836 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1841 if (IsNonTemporal) {
1847 Changed |= enableGLCBit(
MI);
1848 Changed |= enableSLCBit(
MI);
1857 SIAtomicScope
Scope,
1858 SIAtomicAddrSpace AddrSpace,
1860 bool IsCrossAddrSpaceOrdering,
1861 Position Pos)
const {
1862 bool Changed =
false;
1867 if (Pos == Position::AFTER)
1872 bool LGKMCnt =
false;
1877 case SIAtomicScope::SYSTEM:
1878 case SIAtomicScope::AGENT:
1884 case SIAtomicScope::WORKGROUP:
1890 if (!
ST.isCuModeEnabled()) {
1897 case SIAtomicScope::WAVEFRONT:
1898 case SIAtomicScope::SINGLETHREAD:
1909 case SIAtomicScope::SYSTEM:
1910 case SIAtomicScope::AGENT:
1911 case SIAtomicScope::WORKGROUP:
1918 LGKMCnt |= IsCrossAddrSpaceOrdering;
1920 case SIAtomicScope::WAVEFRONT:
1921 case SIAtomicScope::SINGLETHREAD:
1932 case SIAtomicScope::SYSTEM:
1933 case SIAtomicScope::AGENT:
1940 LGKMCnt |= IsCrossAddrSpaceOrdering;
1942 case SIAtomicScope::WORKGROUP:
1943 case SIAtomicScope::WAVEFRONT:
1944 case SIAtomicScope::SINGLETHREAD:
1953 if (VMCnt || LGKMCnt) {
1954 unsigned WaitCntImmediate =
1970 if (Pos == Position::AFTER)
1977 SIAtomicScope
Scope,
1978 SIAtomicAddrSpace AddrSpace,
1979 Position Pos)
const {
1980 if (!InsertCacheInv)
1983 bool Changed =
false;
1988 if (Pos == Position::AFTER)
1993 case SIAtomicScope::SYSTEM:
1994 case SIAtomicScope::AGENT:
1999 case SIAtomicScope::WORKGROUP:
2004 if (!
ST.isCuModeEnabled()) {
2009 case SIAtomicScope::WAVEFRONT:
2010 case SIAtomicScope::SINGLETHREAD:
2025 if (Pos == Position::AFTER)
2031 bool SIGfx11CacheControl::enableLoadCacheBypass(
2033 SIAtomicAddrSpace AddrSpace)
const {
2035 bool Changed =
false;
2039 case SIAtomicScope::SYSTEM:
2040 case SIAtomicScope::AGENT:
2043 Changed |= enableGLCBit(
MI);
2045 case SIAtomicScope::WORKGROUP:
2050 if (!
ST.isCuModeEnabled())
2051 Changed |= enableGLCBit(
MI);
2053 case SIAtomicScope::WAVEFRONT:
2054 case SIAtomicScope::SINGLETHREAD:
2072 bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2087 bool Changed =
false;
2094 Changed |= enableGLCBit(
MI);
2097 Changed |= enableDLCBit(
MI);
2104 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2109 if (IsNonTemporal) {
2115 Changed |= enableGLCBit(
MI);
2116 Changed |= enableSLCBit(
MI);
2119 Changed |= enableDLCBit(
MI);
2126 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2127 if (AtomicPseudoMIs.empty())
2130 for (
auto &
MI : AtomicPseudoMIs)
2131 MI->eraseFromParent();
2133 AtomicPseudoMIs.clear();
2137 bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2141 bool Changed =
false;
2143 if (MOI.isAtomic()) {
2144 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2145 MOI.getOrdering() == AtomicOrdering::Acquire ||
2146 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2147 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2148 MOI.getOrderingAddrSpace());
2151 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2152 Changed |= CC->insertWait(
MI, MOI.getScope(),
2153 MOI.getOrderingAddrSpace(),
2155 MOI.getIsCrossAddressSpaceOrdering(),
2158 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2159 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2160 Changed |= CC->insertWait(
MI, MOI.getScope(),
2161 MOI.getInstrAddrSpace(),
2163 MOI.getIsCrossAddressSpaceOrdering(),
2165 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2166 MOI.getOrderingAddrSpace(),
2176 Changed |= CC->enableVolatileAndOrNonTemporal(
MI, MOI.getInstrAddrSpace(),
2178 MOI.isNonTemporal());
2182 bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2186 bool Changed =
false;
2188 if (MOI.isAtomic()) {
2189 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2190 MOI.getOrdering() == AtomicOrdering::Release ||
2191 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2192 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2193 MOI.getOrderingAddrSpace());
2196 if (MOI.getOrdering() == AtomicOrdering::Release ||
2197 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2198 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2199 MOI.getOrderingAddrSpace(),
2200 MOI.getIsCrossAddressSpaceOrdering(),
2209 Changed |= CC->enableVolatileAndOrNonTemporal(
2211 MOI.isNonTemporal());
2215 bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2219 AtomicPseudoMIs.push_back(
MI);
2220 bool Changed =
false;
2222 if (MOI.isAtomic()) {
2223 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2224 MOI.getOrdering() == AtomicOrdering::Release ||
2225 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2226 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2234 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2235 MOI.getOrderingAddrSpace(),
2236 MOI.getIsCrossAddressSpaceOrdering(),
2244 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2245 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2246 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2247 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2248 MOI.getOrderingAddrSpace(),
2257 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2261 bool Changed =
false;
2263 if (MOI.isAtomic()) {
2264 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2265 MOI.getOrdering() == AtomicOrdering::Acquire ||
2266 MOI.getOrdering() == AtomicOrdering::Release ||
2267 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2268 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2269 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2270 MOI.getInstrAddrSpace());
2273 if (MOI.getOrdering() == AtomicOrdering::Release ||
2274 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2275 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2276 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2277 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2278 MOI.getOrderingAddrSpace(),
2279 MOI.getIsCrossAddressSpaceOrdering(),
2282 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2283 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2284 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2285 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2286 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2287 Changed |= CC->insertWait(
MI, MOI.getScope(),
2288 MOI.getInstrAddrSpace(),
2291 MOI.getIsCrossAddressSpaceOrdering(),
2293 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2294 MOI.getOrderingAddrSpace(),
2305 bool Changed =
false;
2307 SIMemOpAccess MOA(MF);
2310 for (
auto &
MBB : MF) {
2314 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2317 I !=
E &&
I->isBundledWithPred(); ++
I) {
2318 I->unbundleFromPred();
2321 MO.setIsInternalRead(
false);
2324 MI->eraseFromParent();
2325 MI = II->getIterator();
2331 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2332 Changed |= expandLoad(MOI.getValue(),
MI);
2333 else if (
const auto &MOI = MOA.getStoreInfo(
MI))
2334 Changed |= expandStore(MOI.getValue(),
MI);
2335 else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2336 Changed |= expandAtomicFence(MOI.getValue(),
MI);
2337 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2338 Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(),
MI);
2342 Changed |= removeAtomicPseudoMIs();
2348 char SIMemoryLegalizer::
ID = 0;
2352 return new SIMemoryLegalizer();