LLVM  12.0.0git
SIMemoryLegalizer.cpp
Go to the documentation of this file.
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Memory legalizer - implements memory model. More information can be
11 /// found here:
12 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIDefines.h"
20 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/BitmaskEnum.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Pass.h"
41 #include <cassert>
42 #include <list>
43 
44 using namespace llvm;
45 using namespace llvm::AMDGPU;
46 
47 #define DEBUG_TYPE "si-memory-legalizer"
48 #define PASS_NAME "SI Memory Legalizer"
49 
50 namespace {
51 
53 
54 /// Memory operation flags. Can be ORed together.
55 enum class SIMemOp {
56  NONE = 0u,
57  LOAD = 1u << 0,
58  STORE = 1u << 1,
59  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
60 };
61 
62 /// Position to insert a new instruction relative to an existing
63 /// instruction.
64 enum class Position {
65  BEFORE,
66  AFTER
67 };
68 
69 /// The atomic synchronization scopes supported by the AMDGPU target.
70 enum class SIAtomicScope {
71  NONE,
72  SINGLETHREAD,
73  WAVEFRONT,
74  WORKGROUP,
75  AGENT,
76  SYSTEM
77 };
78 
79 /// The distinct address spaces supported by the AMDGPU target for
80 /// atomic memory operation. Can be ORed toether.
81 enum class SIAtomicAddrSpace {
82  NONE = 0u,
83  GLOBAL = 1u << 0,
84  LDS = 1u << 1,
85  SCRATCH = 1u << 2,
86  GDS = 1u << 3,
87  OTHER = 1u << 4,
88 
89  /// The address spaces that can be accessed by a FLAT instruction.
90  FLAT = GLOBAL | LDS | SCRATCH,
91 
92  /// The address spaces that support atomic instructions.
93  ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
94 
95  /// All address spaces.
96  ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
97 
98  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
99 };
100 
101 /// Sets named bit \p BitName to "true" if present in instruction \p MI.
102 /// \returns Returns true if \p MI is modified, false otherwise.
103 template <uint16_t BitName>
104 bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
105  int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
106  if (BitIdx == -1)
107  return false;
108 
109  MachineOperand &Bit = MI->getOperand(BitIdx);
110  if (Bit.getImm() != 0)
111  return false;
112 
113  Bit.setImm(1);
114  return true;
115 }
116 
117 class SIMemOpInfo final {
118 private:
119 
120  friend class SIMemOpAccess;
121 
123  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
124  SIAtomicScope Scope = SIAtomicScope::SYSTEM;
125  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
127  bool IsCrossAddressSpaceOrdering = false;
128  bool IsNonTemporal = false;
129 
131  SIAtomicScope Scope = SIAtomicScope::SYSTEM,
132  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
133  SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
134  bool IsCrossAddressSpaceOrdering = true,
135  AtomicOrdering FailureOrdering =
137  bool IsNonTemporal = false)
138  : Ordering(Ordering), FailureOrdering(FailureOrdering),
139  Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
140  InstrAddrSpace(InstrAddrSpace),
141  IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
142  IsNonTemporal(IsNonTemporal) {
143  // There is also no cross address space ordering if the ordering
144  // address space is the same as the instruction address space and
145  // only contains a single address space.
146  if ((OrderingAddrSpace == InstrAddrSpace) &&
147  isPowerOf2_32(uint32_t(InstrAddrSpace)))
148  this->IsCrossAddressSpaceOrdering = false;
149  }
150 
151 public:
152  /// \returns Atomic synchronization scope of the machine instruction used to
153  /// create this SIMemOpInfo.
154  SIAtomicScope getScope() const {
155  return Scope;
156  }
157 
158  /// \returns Ordering constraint of the machine instruction used to
159  /// create this SIMemOpInfo.
160  AtomicOrdering getOrdering() const {
161  return Ordering;
162  }
163 
164  /// \returns Failure ordering constraint of the machine instruction used to
165  /// create this SIMemOpInfo.
166  AtomicOrdering getFailureOrdering() const {
167  return FailureOrdering;
168  }
169 
170  /// \returns The address spaces be accessed by the machine
171  /// instruction used to create this SiMemOpInfo.
172  SIAtomicAddrSpace getInstrAddrSpace() const {
173  return InstrAddrSpace;
174  }
175 
176  /// \returns The address spaces that must be ordered by the machine
177  /// instruction used to create this SiMemOpInfo.
178  SIAtomicAddrSpace getOrderingAddrSpace() const {
179  return OrderingAddrSpace;
180  }
181 
182  /// \returns Return true iff memory ordering of operations on
183  /// different address spaces is required.
184  bool getIsCrossAddressSpaceOrdering() const {
185  return IsCrossAddressSpaceOrdering;
186  }
187 
188  /// \returns True if memory access of the machine instruction used to
189  /// create this SIMemOpInfo is non-temporal, false otherwise.
190  bool isNonTemporal() const {
191  return IsNonTemporal;
192  }
193 
194  /// \returns True if ordering constraint of the machine instruction used to
195  /// create this SIMemOpInfo is unordered or higher, false otherwise.
196  bool isAtomic() const {
197  return Ordering != AtomicOrdering::NotAtomic;
198  }
199 
200 };
201 
202 class SIMemOpAccess final {
203 private:
204  AMDGPUMachineModuleInfo *MMI = nullptr;
205 
206  /// Reports unsupported message \p Msg for \p MI to LLVM context.
207  void reportUnsupported(const MachineBasicBlock::iterator &MI,
208  const char *Msg) const;
209 
210  /// Inspects the target synchonization scope \p SSID and determines
211  /// the SI atomic scope it corresponds to, the address spaces it
212  /// covers, and whether the memory ordering applies between address
213  /// spaces.
215  toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
216 
217  /// \return Return a bit set of the address spaces accessed by \p AS.
218  SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
219 
220  /// \returns Info constructed from \p MI, which has at least machine memory
221  /// operand.
222  Optional<SIMemOpInfo> constructFromMIWithMMO(
223  const MachineBasicBlock::iterator &MI) const;
224 
225 public:
226  /// Construct class to support accessing the machine memory operands
227  /// of instructions in the machine function \p MF.
228  SIMemOpAccess(MachineFunction &MF);
229 
230  /// \returns Load info if \p MI is a load operation, "None" otherwise.
232  const MachineBasicBlock::iterator &MI) const;
233 
234  /// \returns Store info if \p MI is a store operation, "None" otherwise.
235  Optional<SIMemOpInfo> getStoreInfo(
236  const MachineBasicBlock::iterator &MI) const;
237 
238  /// \returns Atomic fence info if \p MI is an atomic fence operation,
239  /// "None" otherwise.
240  Optional<SIMemOpInfo> getAtomicFenceInfo(
241  const MachineBasicBlock::iterator &MI) const;
242 
243  /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
244  /// rmw operation, "None" otherwise.
245  Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
246  const MachineBasicBlock::iterator &MI) const;
247 };
248 
249 class SICacheControl {
250 protected:
251 
252  /// Instruction info.
253  const SIInstrInfo *TII = nullptr;
254 
255  IsaVersion IV;
256 
257  /// Whether to insert cache invalidation instructions.
258  bool InsertCacheInv;
259 
260  SICacheControl(const GCNSubtarget &ST);
261 
262 public:
263 
264  /// Create a cache control for the subtarget \p ST.
265  static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
266 
267  /// Update \p MI memory load instruction to bypass any caches up to
268  /// the \p Scope memory scope for address spaces \p
269  /// AddrSpace. Return true iff the instruction was modified.
270  virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
272  SIAtomicAddrSpace AddrSpace) const = 0;
273 
274  /// Update \p MI memory instruction to indicate it is
275  /// nontemporal. Return true iff the instruction was modified.
276  virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
277  const = 0;
278 
279  /// Inserts any necessary instructions at position \p Pos relative
280  /// to instruction \p MI to ensure any caches associated with
281  /// address spaces \p AddrSpace for memory scopes up to memory scope
282  /// \p Scope are invalidated. Returns true iff any instructions
283  /// inserted.
284  virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
285  SIAtomicScope Scope,
286  SIAtomicAddrSpace AddrSpace,
287  Position Pos) const = 0;
288 
289  /// Inserts any necessary instructions at position \p Pos relative
290  /// to instruction \p MI to ensure memory instructions of kind \p Op
291  /// associated with address spaces \p AddrSpace have completed as
292  /// observed by other memory instructions executing in memory scope
293  /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
294  /// ordering is between address spaces. Returns true iff any
295  /// instructions inserted.
296  virtual bool insertWait(MachineBasicBlock::iterator &MI,
297  SIAtomicScope Scope,
298  SIAtomicAddrSpace AddrSpace,
299  SIMemOp Op,
300  bool IsCrossAddrSpaceOrdering,
301  Position Pos) const = 0;
302 
303  /// Virtual destructor to allow derivations to be deleted.
304  virtual ~SICacheControl() = default;
305 
306 };
307 
308 class SIGfx6CacheControl : public SICacheControl {
309 protected:
310 
311  /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
312  /// is modified, false otherwise.
313  bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
314  return enableNamedBit<AMDGPU::OpName::glc>(MI);
315  }
316 
317  /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
318  /// is modified, false otherwise.
319  bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
320  return enableNamedBit<AMDGPU::OpName::slc>(MI);
321  }
322 
323 public:
324 
325  SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
326 
327  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
329  SIAtomicAddrSpace AddrSpace) const override;
330 
331  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
332 
333  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
334  SIAtomicScope Scope,
335  SIAtomicAddrSpace AddrSpace,
336  Position Pos) const override;
337 
338  bool insertWait(MachineBasicBlock::iterator &MI,
339  SIAtomicScope Scope,
340  SIAtomicAddrSpace AddrSpace,
341  SIMemOp Op,
342  bool IsCrossAddrSpaceOrdering,
343  Position Pos) const override;
344 };
345 
346 class SIGfx7CacheControl : public SIGfx6CacheControl {
347 public:
348 
349  SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
350 
351  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
353  SIAtomicAddrSpace AddrSpace,
354  Position Pos) const override;
355 
356 };
357 
358 class SIGfx10CacheControl : public SIGfx7CacheControl {
359 protected:
360  bool CuMode = false;
361 
362  /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
363  /// is modified, false otherwise.
364  bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
365  return enableNamedBit<AMDGPU::OpName::dlc>(MI);
366  }
367 
368 public:
369 
370  SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
371  SIGfx7CacheControl(ST), CuMode(CuMode) {};
372 
373  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
375  SIAtomicAddrSpace AddrSpace) const override;
376 
377  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
378 
379  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
380  SIAtomicScope Scope,
381  SIAtomicAddrSpace AddrSpace,
382  Position Pos) const override;
383 
384  bool insertWait(MachineBasicBlock::iterator &MI,
385  SIAtomicScope Scope,
386  SIAtomicAddrSpace AddrSpace,
387  SIMemOp Op,
388  bool IsCrossAddrSpaceOrdering,
389  Position Pos) const override;
390 };
391 
392 class SIMemoryLegalizer final : public MachineFunctionPass {
393 private:
394 
395  /// Cache Control.
396  std::unique_ptr<SICacheControl> CC = nullptr;
397 
398  /// List of atomic pseudo instructions.
399  std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
400 
401  /// Return true iff instruction \p MI is a atomic instruction that
402  /// returns a result.
403  bool isAtomicRet(const MachineInstr &MI) const {
404  return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
405  }
406 
407  /// Removes all processed atomic pseudo instructions from the current
408  /// function. Returns true if current function is modified, false otherwise.
409  bool removeAtomicPseudoMIs();
410 
411  /// Expands load operation \p MI. Returns true if instructions are
412  /// added/deleted or \p MI is modified, false otherwise.
413  bool expandLoad(const SIMemOpInfo &MOI,
415  /// Expands store operation \p MI. Returns true if instructions are
416  /// added/deleted or \p MI is modified, false otherwise.
417  bool expandStore(const SIMemOpInfo &MOI,
419  /// Expands atomic fence operation \p MI. Returns true if
420  /// instructions are added/deleted or \p MI is modified, false otherwise.
421  bool expandAtomicFence(const SIMemOpInfo &MOI,
423  /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
424  /// instructions are added/deleted or \p MI is modified, false otherwise.
425  bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
427 
428 public:
429  static char ID;
430 
431  SIMemoryLegalizer() : MachineFunctionPass(ID) {}
432 
433  void getAnalysisUsage(AnalysisUsage &AU) const override {
434  AU.setPreservesCFG();
436  }
437 
438  StringRef getPassName() const override {
439  return PASS_NAME;
440  }
441 
442  bool runOnMachineFunction(MachineFunction &MF) override;
443 };
444 
445 } // end namespace anonymous
446 
447 void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
448  const char *Msg) const {
449  const Function &Func = MI->getParent()->getParent()->getFunction();
450  DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
451  Func.getContext().diagnose(Diag);
452 }
453 
455 SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
456  SIAtomicAddrSpace InstrScope) const {
457  if (SSID == SyncScope::System)
458  return std::make_tuple(SIAtomicScope::SYSTEM,
459  SIAtomicAddrSpace::ATOMIC,
460  true);
461  if (SSID == MMI->getAgentSSID())
462  return std::make_tuple(SIAtomicScope::AGENT,
463  SIAtomicAddrSpace::ATOMIC,
464  true);
465  if (SSID == MMI->getWorkgroupSSID())
466  return std::make_tuple(SIAtomicScope::WORKGROUP,
467  SIAtomicAddrSpace::ATOMIC,
468  true);
469  if (SSID == MMI->getWavefrontSSID())
470  return std::make_tuple(SIAtomicScope::WAVEFRONT,
471  SIAtomicAddrSpace::ATOMIC,
472  true);
473  if (SSID == SyncScope::SingleThread)
474  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
475  SIAtomicAddrSpace::ATOMIC,
476  true);
477  if (SSID == MMI->getSystemOneAddressSpaceSSID())
478  return std::make_tuple(SIAtomicScope::SYSTEM,
479  SIAtomicAddrSpace::ATOMIC & InstrScope,
480  false);
481  if (SSID == MMI->getAgentOneAddressSpaceSSID())
482  return std::make_tuple(SIAtomicScope::AGENT,
483  SIAtomicAddrSpace::ATOMIC & InstrScope,
484  false);
485  if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
486  return std::make_tuple(SIAtomicScope::WORKGROUP,
487  SIAtomicAddrSpace::ATOMIC & InstrScope,
488  false);
489  if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
490  return std::make_tuple(SIAtomicScope::WAVEFRONT,
491  SIAtomicAddrSpace::ATOMIC & InstrScope,
492  false);
493  if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
494  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
495  SIAtomicAddrSpace::ATOMIC & InstrScope,
496  false);
497  return None;
498 }
499 
500 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
501  if (AS == AMDGPUAS::FLAT_ADDRESS)
503  if (AS == AMDGPUAS::GLOBAL_ADDRESS)
505  if (AS == AMDGPUAS::LOCAL_ADDRESS)
506  return SIAtomicAddrSpace::LDS;
507  if (AS == AMDGPUAS::PRIVATE_ADDRESS)
508  return SIAtomicAddrSpace::SCRATCH;
509  if (AS == AMDGPUAS::REGION_ADDRESS)
510  return SIAtomicAddrSpace::GDS;
511 
512  return SIAtomicAddrSpace::OTHER;
513 }
514 
515 SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
517 }
518 
519 Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
520  const MachineBasicBlock::iterator &MI) const {
521  assert(MI->getNumMemOperands() > 0);
522 
525  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
527  bool IsNonTemporal = true;
528 
529  // Validator should check whether or not MMOs cover the entire set of
530  // locations accessed by the memory instruction.
531  for (const auto &MMO : MI->memoperands()) {
532  IsNonTemporal &= MMO->isNonTemporal();
533  InstrAddrSpace |=
534  toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
535  AtomicOrdering OpOrdering = MMO->getOrdering();
536  if (OpOrdering != AtomicOrdering::NotAtomic) {
537  const auto &IsSyncScopeInclusion =
538  MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
539  if (!IsSyncScopeInclusion) {
540  reportUnsupported(MI,
541  "Unsupported non-inclusive atomic synchronization scope");
542  return None;
543  }
544 
545  SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
546  Ordering =
547  isStrongerThan(Ordering, OpOrdering) ?
548  Ordering : MMO->getOrdering();
549  assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
550  MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
551  FailureOrdering =
552  isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
553  FailureOrdering : MMO->getFailureOrdering();
554  }
555  }
556 
558  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
559  bool IsCrossAddressSpaceOrdering = false;
560  if (Ordering != AtomicOrdering::NotAtomic) {
561  auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
562  if (!ScopeOrNone) {
563  reportUnsupported(MI, "Unsupported atomic synchronization scope");
564  return None;
565  }
566  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
567  ScopeOrNone.getValue();
568  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
569  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
570  reportUnsupported(MI, "Unsupported atomic address space");
571  return None;
572  }
573  }
574  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
575  IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
576 }
577 
579  const MachineBasicBlock::iterator &MI) const {
580  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
581 
582  if (!(MI->mayLoad() && !MI->mayStore()))
583  return None;
584 
585  // Be conservative if there are no memory operands.
586  if (MI->getNumMemOperands() == 0)
587  return SIMemOpInfo();
588 
589  return constructFromMIWithMMO(MI);
590 }
591 
592 Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
593  const MachineBasicBlock::iterator &MI) const {
594  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
595 
596  if (!(!MI->mayLoad() && MI->mayStore()))
597  return None;
598 
599  // Be conservative if there are no memory operands.
600  if (MI->getNumMemOperands() == 0)
601  return SIMemOpInfo();
602 
603  return constructFromMIWithMMO(MI);
604 }
605 
606 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
607  const MachineBasicBlock::iterator &MI) const {
608  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
609 
610  if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
611  return None;
612 
613  AtomicOrdering Ordering =
614  static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
615 
616  SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
617  auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
618  if (!ScopeOrNone) {
619  reportUnsupported(MI, "Unsupported atomic synchronization scope");
620  return None;
621  }
622 
624  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
625  bool IsCrossAddressSpaceOrdering = false;
626  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
627  ScopeOrNone.getValue();
628 
629  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
630  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
631  reportUnsupported(MI, "Unsupported atomic address space");
632  return None;
633  }
634 
635  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
636  IsCrossAddressSpaceOrdering);
637 }
638 
639 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
640  const MachineBasicBlock::iterator &MI) const {
641  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
642 
643  if (!(MI->mayLoad() && MI->mayStore()))
644  return None;
645 
646  // Be conservative if there are no memory operands.
647  if (MI->getNumMemOperands() == 0)
648  return SIMemOpInfo();
649 
650  return constructFromMIWithMMO(MI);
651 }
652 
653 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
654  TII = ST.getInstrInfo();
655  IV = getIsaVersion(ST.getCPU());
656  InsertCacheInv = !ST.isAmdPalOS();
657 }
658 
659 /* static */
660 std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
661  GCNSubtarget::Generation Generation = ST.getGeneration();
662  if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
663  return std::make_unique<SIGfx6CacheControl>(ST);
664  if (Generation < AMDGPUSubtarget::GFX10)
665  return std::make_unique<SIGfx7CacheControl>(ST);
666  return std::make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
667 }
668 
669 bool SIGfx6CacheControl::enableLoadCacheBypass(
670  const MachineBasicBlock::iterator &MI,
672  SIAtomicAddrSpace AddrSpace) const {
673  assert(MI->mayLoad() && !MI->mayStore());
674  bool Changed = false;
675 
676  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
677  /// TODO: Do not set glc for rmw atomic operations as they
678  /// implicitly bypass the L1 cache.
679 
680  switch (Scope) {
681  case SIAtomicScope::SYSTEM:
682  case SIAtomicScope::AGENT:
683  Changed |= enableGLCBit(MI);
684  break;
685  case SIAtomicScope::WORKGROUP:
686  case SIAtomicScope::WAVEFRONT:
687  case SIAtomicScope::SINGLETHREAD:
688  // No cache to bypass.
689  break;
690  default:
691  llvm_unreachable("Unsupported synchronization scope");
692  }
693  }
694 
695  /// The scratch address space does not need the global memory caches
696  /// to be bypassed as all memory operations by the same thread are
697  /// sequentially consistent, and no other thread can access scratch
698  /// memory.
699 
700  /// Other address spaces do not hava a cache.
701 
702  return Changed;
703 }
704 
705 bool SIGfx6CacheControl::enableNonTemporal(
706  const MachineBasicBlock::iterator &MI) const {
707  assert(MI->mayLoad() ^ MI->mayStore());
708  bool Changed = false;
709 
710  /// TODO: Do not enableGLCBit if rmw atomic.
711  Changed |= enableGLCBit(MI);
712  Changed |= enableSLCBit(MI);
713 
714  return Changed;
715 }
716 
717 bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
718  SIAtomicScope Scope,
719  SIAtomicAddrSpace AddrSpace,
720  Position Pos) const {
721  if (!InsertCacheInv)
722  return false;
723 
724  bool Changed = false;
725 
726  MachineBasicBlock &MBB = *MI->getParent();
727  DebugLoc DL = MI->getDebugLoc();
728 
729  if (Pos == Position::AFTER)
730  ++MI;
731 
732  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
733  switch (Scope) {
734  case SIAtomicScope::SYSTEM:
735  case SIAtomicScope::AGENT:
736  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
737  Changed = true;
738  break;
739  case SIAtomicScope::WORKGROUP:
740  case SIAtomicScope::WAVEFRONT:
741  case SIAtomicScope::SINGLETHREAD:
742  // No cache to invalidate.
743  break;
744  default:
745  llvm_unreachable("Unsupported synchronization scope");
746  }
747  }
748 
749  /// The scratch address space does not need the global memory cache
750  /// to be flushed as all memory operations by the same thread are
751  /// sequentially consistent, and no other thread can access scratch
752  /// memory.
753 
754  /// Other address spaces do not hava a cache.
755 
756  if (Pos == Position::AFTER)
757  --MI;
758 
759  return Changed;
760 }
761 
762 bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
763  SIAtomicScope Scope,
764  SIAtomicAddrSpace AddrSpace,
765  SIMemOp Op,
766  bool IsCrossAddrSpaceOrdering,
767  Position Pos) const {
768  bool Changed = false;
769 
770  MachineBasicBlock &MBB = *MI->getParent();
771  DebugLoc DL = MI->getDebugLoc();
772 
773  if (Pos == Position::AFTER)
774  ++MI;
775 
776  bool VMCnt = false;
777  bool LGKMCnt = false;
778 
779  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
780  switch (Scope) {
781  case SIAtomicScope::SYSTEM:
782  case SIAtomicScope::AGENT:
783  VMCnt |= true;
784  break;
785  case SIAtomicScope::WORKGROUP:
786  case SIAtomicScope::WAVEFRONT:
787  case SIAtomicScope::SINGLETHREAD:
788  // The L1 cache keeps all memory operations in order for
789  // wavefronts in the same work-group.
790  break;
791  default:
792  llvm_unreachable("Unsupported synchronization scope");
793  }
794  }
795 
796  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
797  switch (Scope) {
798  case SIAtomicScope::SYSTEM:
799  case SIAtomicScope::AGENT:
800  case SIAtomicScope::WORKGROUP:
801  // If no cross address space ordering then an LDS waitcnt is not
802  // needed as LDS operations for all waves are executed in a
803  // total global ordering as observed by all waves. Required if
804  // also synchronizing with global/GDS memory as LDS operations
805  // could be reordered with respect to later global/GDS memory
806  // operations of the same wave.
807  LGKMCnt |= IsCrossAddrSpaceOrdering;
808  break;
809  case SIAtomicScope::WAVEFRONT:
810  case SIAtomicScope::SINGLETHREAD:
811  // The LDS keeps all memory operations in order for
812  // the same wavesfront.
813  break;
814  default:
815  llvm_unreachable("Unsupported synchronization scope");
816  }
817  }
818 
819  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
820  switch (Scope) {
821  case SIAtomicScope::SYSTEM:
822  case SIAtomicScope::AGENT:
823  // If no cross address space ordering then an GDS waitcnt is not
824  // needed as GDS operations for all waves are executed in a
825  // total global ordering as observed by all waves. Required if
826  // also synchronizing with global/LDS memory as GDS operations
827  // could be reordered with respect to later global/LDS memory
828  // operations of the same wave.
829  LGKMCnt |= IsCrossAddrSpaceOrdering;
830  break;
831  case SIAtomicScope::WORKGROUP:
832  case SIAtomicScope::WAVEFRONT:
833  case SIAtomicScope::SINGLETHREAD:
834  // The GDS keeps all memory operations in order for
835  // the same work-group.
836  break;
837  default:
838  llvm_unreachable("Unsupported synchronization scope");
839  }
840  }
841 
842  if (VMCnt || LGKMCnt) {
843  unsigned WaitCntImmediate =
845  VMCnt ? 0 : getVmcntBitMask(IV),
846  getExpcntBitMask(IV),
847  LGKMCnt ? 0 : getLgkmcntBitMask(IV));
848  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
849  Changed = true;
850  }
851 
852  if (Pos == Position::AFTER)
853  --MI;
854 
855  return Changed;
856 }
857 
858 bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
859  SIAtomicScope Scope,
860  SIAtomicAddrSpace AddrSpace,
861  Position Pos) const {
862  if (!InsertCacheInv)
863  return false;
864 
865  bool Changed = false;
866 
867  MachineBasicBlock &MBB = *MI->getParent();
868  DebugLoc DL = MI->getDebugLoc();
869 
870  const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
871 
872  const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
873  ? AMDGPU::BUFFER_WBINVL1
874  : AMDGPU::BUFFER_WBINVL1_VOL;
875 
876  if (Pos == Position::AFTER)
877  ++MI;
878 
879  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
880  switch (Scope) {
881  case SIAtomicScope::SYSTEM:
882  case SIAtomicScope::AGENT:
883  BuildMI(MBB, MI, DL, TII->get(Flush));
884  Changed = true;
885  break;
886  case SIAtomicScope::WORKGROUP:
887  case SIAtomicScope::WAVEFRONT:
888  case SIAtomicScope::SINGLETHREAD:
889  // No cache to invalidate.
890  break;
891  default:
892  llvm_unreachable("Unsupported synchronization scope");
893  }
894  }
895 
896  /// The scratch address space does not need the global memory cache
897  /// to be flushed as all memory operations by the same thread are
898  /// sequentially consistent, and no other thread can access scratch
899  /// memory.
900 
901  /// Other address spaces do not hava a cache.
902 
903  if (Pos == Position::AFTER)
904  --MI;
905 
906  return Changed;
907 }
908 
909 bool SIGfx10CacheControl::enableLoadCacheBypass(
910  const MachineBasicBlock::iterator &MI,
911  SIAtomicScope Scope,
912  SIAtomicAddrSpace AddrSpace) const {
913  assert(MI->mayLoad() && !MI->mayStore());
914  bool Changed = false;
915 
916  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
917  /// TODO Do not set glc for rmw atomic operations as they
918  /// implicitly bypass the L0/L1 caches.
919 
920  switch (Scope) {
921  case SIAtomicScope::SYSTEM:
922  case SIAtomicScope::AGENT:
923  Changed |= enableGLCBit(MI);
924  Changed |= enableDLCBit(MI);
925  break;
926  case SIAtomicScope::WORKGROUP:
927  // In WGP mode the waves of a work-group can be executing on either CU of
928  // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
929  // CU mode and all waves of a work-group are on the same CU, and so the
930  // L0 does not need to be bypassed.
931  if (!CuMode) Changed |= enableGLCBit(MI);
932  break;
933  case SIAtomicScope::WAVEFRONT:
934  case SIAtomicScope::SINGLETHREAD:
935  // No cache to bypass.
936  break;
937  default:
938  llvm_unreachable("Unsupported synchronization scope");
939  }
940  }
941 
942  /// The scratch address space does not need the global memory caches
943  /// to be bypassed as all memory operations by the same thread are
944  /// sequentially consistent, and no other thread can access scratch
945  /// memory.
946 
947  /// Other address spaces do not hava a cache.
948 
949  return Changed;
950 }
951 
952 bool SIGfx10CacheControl::enableNonTemporal(
953  const MachineBasicBlock::iterator &MI) const {
954  assert(MI->mayLoad() ^ MI->mayStore());
955  bool Changed = false;
956 
957  Changed |= enableSLCBit(MI);
958  /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
959 
960  return Changed;
961 }
962 
963 bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
964  SIAtomicScope Scope,
965  SIAtomicAddrSpace AddrSpace,
966  Position Pos) const {
967  if (!InsertCacheInv)
968  return false;
969 
970  bool Changed = false;
971 
972  MachineBasicBlock &MBB = *MI->getParent();
973  DebugLoc DL = MI->getDebugLoc();
974 
975  if (Pos == Position::AFTER)
976  ++MI;
977 
978  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
979  switch (Scope) {
980  case SIAtomicScope::SYSTEM:
981  case SIAtomicScope::AGENT:
982  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
983  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
984  Changed = true;
985  break;
986  case SIAtomicScope::WORKGROUP:
987  // In WGP mode the waves of a work-group can be executing on either CU of
988  // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
989  // in CU mode and all waves of a work-group are on the same CU, and so the
990  // L0 does not need to be invalidated.
991  if (!CuMode) {
992  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
993  Changed = true;
994  }
995  break;
996  case SIAtomicScope::WAVEFRONT:
997  case SIAtomicScope::SINGLETHREAD:
998  // No cache to invalidate.
999  break;
1000  default:
1001  llvm_unreachable("Unsupported synchronization scope");
1002  }
1003  }
1004 
1005  /// The scratch address space does not need the global memory cache
1006  /// to be flushed as all memory operations by the same thread are
1007  /// sequentially consistent, and no other thread can access scratch
1008  /// memory.
1009 
1010  /// Other address spaces do not hava a cache.
1011 
1012  if (Pos == Position::AFTER)
1013  --MI;
1014 
1015  return Changed;
1016 }
1017 
1018 bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
1019  SIAtomicScope Scope,
1020  SIAtomicAddrSpace AddrSpace,
1021  SIMemOp Op,
1022  bool IsCrossAddrSpaceOrdering,
1023  Position Pos) const {
1024  bool Changed = false;
1025 
1026  MachineBasicBlock &MBB = *MI->getParent();
1027  DebugLoc DL = MI->getDebugLoc();
1028 
1029  if (Pos == Position::AFTER)
1030  ++MI;
1031 
1032  bool VMCnt = false;
1033  bool VSCnt = false;
1034  bool LGKMCnt = false;
1035 
1036  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1037  switch (Scope) {
1038  case SIAtomicScope::SYSTEM:
1039  case SIAtomicScope::AGENT:
1040  if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1041  VMCnt |= true;
1042  if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1043  VSCnt |= true;
1044  break;
1045  case SIAtomicScope::WORKGROUP:
1046  // In WGP mode the waves of a work-group can be executing on either CU of
1047  // the WGP. Therefore need to wait for operations to complete to ensure
1048  // they are visible to waves in the other CU as the L0 is per CU.
1049  // Otherwise in CU mode and all waves of a work-group are on the same CU
1050  // which shares the same L0.
1051  if (!CuMode) {
1052  if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1053  VMCnt |= true;
1054  if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1055  VSCnt |= true;
1056  }
1057  break;
1058  case SIAtomicScope::WAVEFRONT:
1059  case SIAtomicScope::SINGLETHREAD:
1060  // The L0 cache keeps all memory operations in order for
1061  // work-items in the same wavefront.
1062  break;
1063  default:
1064  llvm_unreachable("Unsupported synchronization scope");
1065  }
1066  }
1067 
1068  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1069  switch (Scope) {
1070  case SIAtomicScope::SYSTEM:
1071  case SIAtomicScope::AGENT:
1072  case SIAtomicScope::WORKGROUP:
1073  // If no cross address space ordering then an LDS waitcnt is not
1074  // needed as LDS operations for all waves are executed in a
1075  // total global ordering as observed by all waves. Required if
1076  // also synchronizing with global/GDS memory as LDS operations
1077  // could be reordered with respect to later global/GDS memory
1078  // operations of the same wave.
1079  LGKMCnt |= IsCrossAddrSpaceOrdering;
1080  break;
1081  case SIAtomicScope::WAVEFRONT:
1082  case SIAtomicScope::SINGLETHREAD:
1083  // The LDS keeps all memory operations in order for
1084  // the same wavesfront.
1085  break;
1086  default:
1087  llvm_unreachable("Unsupported synchronization scope");
1088  }
1089  }
1090 
1091  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1092  switch (Scope) {
1093  case SIAtomicScope::SYSTEM:
1094  case SIAtomicScope::AGENT:
1095  // If no cross address space ordering then an GDS waitcnt is not
1096  // needed as GDS operations for all waves are executed in a
1097  // total global ordering as observed by all waves. Required if
1098  // also synchronizing with global/LDS memory as GDS operations
1099  // could be reordered with respect to later global/LDS memory
1100  // operations of the same wave.
1101  LGKMCnt |= IsCrossAddrSpaceOrdering;
1102  break;
1103  case SIAtomicScope::WORKGROUP:
1104  case SIAtomicScope::WAVEFRONT:
1105  case SIAtomicScope::SINGLETHREAD:
1106  // The GDS keeps all memory operations in order for
1107  // the same work-group.
1108  break;
1109  default:
1110  llvm_unreachable("Unsupported synchronization scope");
1111  }
1112  }
1113 
1114  if (VMCnt || LGKMCnt) {
1115  unsigned WaitCntImmediate =
1117  VMCnt ? 0 : getVmcntBitMask(IV),
1118  getExpcntBitMask(IV),
1119  LGKMCnt ? 0 : getLgkmcntBitMask(IV));
1120  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
1121  Changed = true;
1122  }
1123 
1124  if (VSCnt) {
1125  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
1126  .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1127  .addImm(0);
1128  Changed = true;
1129  }
1130 
1131  if (Pos == Position::AFTER)
1132  --MI;
1133 
1134  return Changed;
1135 }
1136 
1137 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
1138  if (AtomicPseudoMIs.empty())
1139  return false;
1140 
1141  for (auto &MI : AtomicPseudoMIs)
1142  MI->eraseFromParent();
1143 
1144  AtomicPseudoMIs.clear();
1145  return true;
1146 }
1147 
1148 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
1150  assert(MI->mayLoad() && !MI->mayStore());
1151 
1152  bool Changed = false;
1153 
1154  if (MOI.isAtomic()) {
1155  if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
1156  MOI.getOrdering() == AtomicOrdering::Acquire ||
1157  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
1158  Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
1159  MOI.getOrderingAddrSpace());
1160  }
1161 
1162  if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1163  Changed |= CC->insertWait(MI, MOI.getScope(),
1164  MOI.getOrderingAddrSpace(),
1166  MOI.getIsCrossAddressSpaceOrdering(),
1167  Position::BEFORE);
1168 
1169  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1170  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
1171  Changed |= CC->insertWait(MI, MOI.getScope(),
1172  MOI.getInstrAddrSpace(),
1173  SIMemOp::LOAD,
1174  MOI.getIsCrossAddressSpaceOrdering(),
1175  Position::AFTER);
1176  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1177  MOI.getOrderingAddrSpace(),
1178  Position::AFTER);
1179  }
1180 
1181  return Changed;
1182  }
1183 
1184  // Atomic instructions do not have the nontemporal attribute.
1185  if (MOI.isNonTemporal()) {
1186  Changed |= CC->enableNonTemporal(MI);
1187  return Changed;
1188  }
1189 
1190  return Changed;
1191 }
1192 
1193 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
1195  assert(!MI->mayLoad() && MI->mayStore());
1196 
1197  bool Changed = false;
1198 
1199  if (MOI.isAtomic()) {
1200  if (MOI.getOrdering() == AtomicOrdering::Release ||
1201  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1202  Changed |= CC->insertWait(MI, MOI.getScope(),
1203  MOI.getOrderingAddrSpace(),
1205  MOI.getIsCrossAddressSpaceOrdering(),
1206  Position::BEFORE);
1207 
1208  return Changed;
1209  }
1210 
1211  // Atomic instructions do not have the nontemporal attribute.
1212  if (MOI.isNonTemporal()) {
1213  Changed |= CC->enableNonTemporal(MI);
1214  return Changed;
1215  }
1216 
1217  return Changed;
1218 }
1219 
1220 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
1222  assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
1223 
1224  AtomicPseudoMIs.push_back(MI);
1225  bool Changed = false;
1226 
1227  if (MOI.isAtomic()) {
1228  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1229  MOI.getOrdering() == AtomicOrdering::Release ||
1230  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1231  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1232  /// TODO: This relies on a barrier always generating a waitcnt
1233  /// for LDS to ensure it is not reordered with the completion of
1234  /// the proceeding LDS operations. If barrier had a memory
1235  /// ordering and memory scope, then library does not need to
1236  /// generate a fence. Could add support in this file for
1237  /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
1238  /// adding waitcnt before a S_BARRIER.
1239  Changed |= CC->insertWait(MI, MOI.getScope(),
1240  MOI.getOrderingAddrSpace(),
1242  MOI.getIsCrossAddressSpaceOrdering(),
1243  Position::BEFORE);
1244 
1245  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1246  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1247  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1248  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1249  MOI.getOrderingAddrSpace(),
1250  Position::BEFORE);
1251 
1252  return Changed;
1253  }
1254 
1255  return Changed;
1256 }
1257 
1258 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
1260  assert(MI->mayLoad() && MI->mayStore());
1261 
1262  bool Changed = false;
1263 
1264  if (MOI.isAtomic()) {
1265  if (MOI.getOrdering() == AtomicOrdering::Release ||
1266  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1267  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
1268  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
1269  Changed |= CC->insertWait(MI, MOI.getScope(),
1270  MOI.getOrderingAddrSpace(),
1272  MOI.getIsCrossAddressSpaceOrdering(),
1273  Position::BEFORE);
1274 
1275  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1276  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1277  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
1278  MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
1279  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
1280  Changed |= CC->insertWait(MI, MOI.getScope(),
1281  MOI.getOrderingAddrSpace(),
1282  isAtomicRet(*MI) ? SIMemOp::LOAD :
1284  MOI.getIsCrossAddressSpaceOrdering(),
1285  Position::AFTER);
1286  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1287  MOI.getOrderingAddrSpace(),
1288  Position::AFTER);
1289  }
1290 
1291  return Changed;
1292  }
1293 
1294  return Changed;
1295 }
1296 
1297 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
1298  bool Changed = false;
1299 
1300  SIMemOpAccess MOA(MF);
1301  CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
1302 
1303  for (auto &MBB : MF) {
1304  for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
1305 
1306  if (MI->getOpcode() == TargetOpcode::BUNDLE && MI->mayLoadOrStore()) {
1307  MachineBasicBlock::instr_iterator II(MI->getIterator());
1309  I != E && I->isBundledWithPred(); ++I) {
1310  I->unbundleFromPred();
1311  for (MachineOperand &MO : I->operands())
1312  if (MO.isReg())
1313  MO.setIsInternalRead(false);
1314  }
1315 
1316  MI->eraseFromParent();
1317  MI = II->getIterator();
1318  }
1319 
1320  if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
1321  continue;
1322 
1323  if (const auto &MOI = MOA.getLoadInfo(MI))
1324  Changed |= expandLoad(MOI.getValue(), MI);
1325  else if (const auto &MOI = MOA.getStoreInfo(MI))
1326  Changed |= expandStore(MOI.getValue(), MI);
1327  else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
1328  Changed |= expandAtomicFence(MOI.getValue(), MI);
1329  else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
1330  Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
1331  }
1332  }
1333 
1334  Changed |= removeAtomicPseudoMIs();
1335  return Changed;
1336 }
1337 
1338 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
1339 
1340 char SIMemoryLegalizer::ID = 0;
1342 
1344  return new SIMemoryLegalizer();
1345 }
const NoneType None
Definition: None.h:23
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Definition: BitmaskEnum.h:41
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getExpcntBitMask(const IsaVersion &Version)
Instruction set architecture version.
Definition: TargetParser.h:94
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:33
MachineModuleInfo & getMMI() const
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
FunctionPass * createSIMemoryLegalizerPass()
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:993
MachineBasicBlock & MBB
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice, which is based on C++&#39;s definition.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:456
Position
Position to insert a new instruction relative to an existing instruction.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:298
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:55
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Address space for private memory.
Definition: AMDGPU.h:303
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
SIAtomicScope
The atomic synchronization scopes supported by the AMDGPU target.
AMDGPU Machine Module Info.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Generation getGeneration() const
Ty & getObjFileInfo()
Keep track of various per-function pieces of information for backends that would like to do so...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:252
Address space for local memory.
Definition: AMDGPU.h:302
static bool isAtomic(Instruction *I)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
IsaVersion getIsaVersion(StringRef GPU)
Iterator for intrusive lists based on ilist_node.
SIAtomicAddrSpace
The distinct address spaces supported by the AMDGPU target for atomic memory operation.
MachineOperand class - Representation of each machine instruction operand.
char & SIMemoryLegalizerID
#define DEBUG_TYPE
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
int64_t getImm() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:62
SIMemOp
Memory operation flags. Can be ORed together.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:817
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:58
#define I(x, y, z)
Definition: MD5.cpp:59
Address space for flat memory.
Definition: AMDGPU.h:297
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
unsigned getLgkmcntBitMask(const IsaVersion &Version)
Address space for region memory. (GDS)
Definition: AMDGPU.h:299
#define PASS_NAME
unsigned getVmcntBitMask(const IsaVersion &Version)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL