LLVM  15.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InstIterator.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
51 #include <cassert>
52 #include <cstdint>
53 #include <iterator>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "atomic-expand"
58 
59 namespace {
60 
61 class AtomicExpand : public FunctionPass {
62  const TargetLowering *TLI = nullptr;
63 
64 public:
65  static char ID; // Pass identification, replacement for typeid
66 
67  AtomicExpand() : FunctionPass(ID) {
69  }
70 
71  bool runOnFunction(Function &F) override;
72 
73 private:
74  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
75  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
76  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
77  bool tryExpandAtomicLoad(LoadInst *LI);
78  bool expandAtomicLoadToLL(LoadInst *LI);
79  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
80  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
81  bool tryExpandAtomicStore(StoreInst *SI);
82  void expandAtomicStore(StoreInst *SI);
83  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
84  AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
85  Value *
86  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
87  Align AddrAlign, AtomicOrdering MemOpOrder,
88  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
89  void
90  expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr,
91  Align AddrAlign, AtomicOrdering MemOpOrder,
92  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
93  void expandPartwordAtomicRMW(
95  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
96  bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
97  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
98  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
99 
100  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
101  static Value *
102  insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr,
103  Align AddrAlign, AtomicOrdering MemOpOrder,
104  SyncScope::ID SSID,
105  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
106  CreateCmpXchgInstFun CreateCmpXchg);
107  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
108 
109  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
110  bool isIdempotentRMW(AtomicRMWInst *RMWI);
111  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
112 
113  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
114  Value *PointerOperand, Value *ValueOperand,
115  Value *CASExpected, AtomicOrdering Ordering,
116  AtomicOrdering Ordering2,
117  ArrayRef<RTLIB::Libcall> Libcalls);
118  void expandAtomicLoadToLibcall(LoadInst *LI);
119  void expandAtomicStoreToLibcall(StoreInst *LI);
120  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
121  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
122 
123  friend bool
125  CreateCmpXchgInstFun CreateCmpXchg);
126 };
127 
128 } // end anonymous namespace
129 
130 char AtomicExpand::ID = 0;
131 
133 
134 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
135  false)
136 
137 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
138 
139 // Helper functions to retrieve the size of atomic instructions.
140 static unsigned getAtomicOpSize(LoadInst *LI) {
141  const DataLayout &DL = LI->getModule()->getDataLayout();
142  return DL.getTypeStoreSize(LI->getType());
143 }
144 
145 static unsigned getAtomicOpSize(StoreInst *SI) {
146  const DataLayout &DL = SI->getModule()->getDataLayout();
147  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
148 }
149 
150 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
151  const DataLayout &DL = RMWI->getModule()->getDataLayout();
152  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
153 }
154 
155 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
156  const DataLayout &DL = CASI->getModule()->getDataLayout();
157  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
158 }
159 
160 // Determine if a particular atomic operation has a supported size,
161 // and is of appropriate alignment, to be passed through for target
162 // lowering. (Versus turning into a __atomic libcall)
163 template <typename Inst>
164 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
165  unsigned Size = getAtomicOpSize(I);
166  Align Alignment = I->getAlign();
167  return Alignment >= Size &&
168  Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
169 }
170 
172  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
173  if (!TPC)
174  return false;
175 
176  auto &TM = TPC->getTM<TargetMachine>();
177  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
178  return false;
179  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
180 
181  SmallVector<Instruction *, 1> AtomicInsts;
182 
183  // Changing control-flow while iterating through it is a bad idea, so gather a
184  // list of all atomic instructions before we start.
185  for (Instruction &I : instructions(F))
186  if (I.isAtomic() && !isa<FenceInst>(&I))
187  AtomicInsts.push_back(&I);
188 
189  bool MadeChange = false;
190  for (auto I : AtomicInsts) {
191  auto LI = dyn_cast<LoadInst>(I);
192  auto SI = dyn_cast<StoreInst>(I);
193  auto RMWI = dyn_cast<AtomicRMWInst>(I);
194  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
195  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
196 
197  // If the Size/Alignment is not supported, replace with a libcall.
198  if (LI) {
199  if (!atomicSizeSupported(TLI, LI)) {
200  expandAtomicLoadToLibcall(LI);
201  MadeChange = true;
202  continue;
203  }
204  } else if (SI) {
205  if (!atomicSizeSupported(TLI, SI)) {
206  expandAtomicStoreToLibcall(SI);
207  MadeChange = true;
208  continue;
209  }
210  } else if (RMWI) {
211  if (!atomicSizeSupported(TLI, RMWI)) {
212  expandAtomicRMWToLibcall(RMWI);
213  MadeChange = true;
214  continue;
215  }
216  } else if (CASI) {
217  if (!atomicSizeSupported(TLI, CASI)) {
218  expandAtomicCASToLibcall(CASI);
219  MadeChange = true;
220  continue;
221  }
222  }
223 
224  if (TLI->shouldInsertFencesForAtomic(I)) {
225  auto FenceOrdering = AtomicOrdering::Monotonic;
226  if (LI && isAcquireOrStronger(LI->getOrdering())) {
227  FenceOrdering = LI->getOrdering();
228  LI->setOrdering(AtomicOrdering::Monotonic);
229  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
230  FenceOrdering = SI->getOrdering();
231  SI->setOrdering(AtomicOrdering::Monotonic);
232  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
233  isAcquireOrStronger(RMWI->getOrdering()))) {
234  FenceOrdering = RMWI->getOrdering();
235  RMWI->setOrdering(AtomicOrdering::Monotonic);
236  } else if (CASI &&
237  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
239  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
240  isAcquireOrStronger(CASI->getSuccessOrdering()) ||
241  isAcquireOrStronger(CASI->getFailureOrdering()))) {
242  // If a compare and swap is lowered to LL/SC, we can do smarter fence
243  // insertion, with a stronger one on the success path than on the
244  // failure path. As a result, fence insertion is directly done by
245  // expandAtomicCmpXchg in that case.
246  FenceOrdering = CASI->getMergedOrdering();
247  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
248  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
249  }
250 
251  if (FenceOrdering != AtomicOrdering::Monotonic) {
252  MadeChange |= bracketInstWithFences(I, FenceOrdering);
253  }
254  }
255 
256  if (LI) {
257  if (TLI->shouldCastAtomicLoadInIR(LI) ==
259  // TODO: add a TLI hook to control this so that each target can
260  // convert to lowering the original type one at a time.
261  LI = convertAtomicLoadToIntegerType(LI);
262  assert(LI->getType()->isIntegerTy() && "invariant broken");
263  MadeChange = true;
264  }
265 
266  MadeChange |= tryExpandAtomicLoad(LI);
267  } else if (SI) {
268  if (TLI->shouldCastAtomicStoreInIR(SI) ==
270  // TODO: add a TLI hook to control this so that each target can
271  // convert to lowering the original type one at a time.
272  SI = convertAtomicStoreToIntegerType(SI);
273  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
274  "invariant broken");
275  MadeChange = true;
276  }
277 
278  if (tryExpandAtomicStore(SI))
279  MadeChange = true;
280  } else if (RMWI) {
281  // There are two different ways of expanding RMW instructions:
282  // - into a load if it is idempotent
283  // - into a Cmpxchg/LL-SC loop otherwise
284  // we try them in that order.
285 
286  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
287  MadeChange = true;
288  } else {
289  AtomicRMWInst::BinOp Op = RMWI->getOperation();
290  if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
292  // TODO: add a TLI hook to control this so that each target can
293  // convert to lowering the original type one at a time.
294  RMWI = convertAtomicXchgToIntegerType(RMWI);
295  assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
296  "invariant broken");
297  MadeChange = true;
298  }
299  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
300  unsigned ValueSize = getAtomicOpSize(RMWI);
301  if (ValueSize < MinCASSize &&
303  Op == AtomicRMWInst::And)) {
304  RMWI = widenPartwordAtomicRMW(RMWI);
305  MadeChange = true;
306  }
307 
308  MadeChange |= tryExpandAtomicRMW(RMWI);
309  }
310  } else if (CASI) {
311  // TODO: when we're ready to make the change at the IR level, we can
312  // extend convertCmpXchgToInteger for floating point too.
313  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
314  "unimplemented - floating point not legal at IR level");
315  if (CASI->getCompareOperand()->getType()->isPointerTy()) {
316  // TODO: add a TLI hook to control this so that each target can
317  // convert to lowering the original type one at a time.
318  CASI = convertCmpXchgToIntegerType(CASI);
319  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
320  "invariant broken");
321  MadeChange = true;
322  }
323 
324  MadeChange |= tryExpandAtomicCmpXchg(CASI);
325  }
326  }
327  return MadeChange;
328 }
329 
330 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
332 
333  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
334 
335  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
336  // We have a guard here because not every atomic operation generates a
337  // trailing fence.
338  if (TrailingFence)
339  TrailingFence->moveAfter(I);
340 
341  return (LeadingFence || TrailingFence);
342 }
343 
344 /// Get the iX type with the same bitwidth as T.
345 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
346  const DataLayout &DL) {
347  EVT VT = TLI->getMemValueType(DL, T);
348  unsigned BitWidth = VT.getStoreSizeInBits();
349  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
350  return IntegerType::get(T->getContext(), BitWidth);
351 }
352 
353 /// Convert an atomic load of a non-integral type to an integer load of the
354 /// equivalent bitwidth. See the function comment on
355 /// convertAtomicStoreToIntegerType for background.
356 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
357  auto *M = LI->getModule();
358  Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
359 
360  IRBuilder<> Builder(LI);
361 
362  Value *Addr = LI->getPointerOperand();
363  Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
364  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
365 
366  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
367  NewLI->setAlignment(LI->getAlign());
368  NewLI->setVolatile(LI->isVolatile());
369  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
370  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
371 
372  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
373  LI->replaceAllUsesWith(NewVal);
374  LI->eraseFromParent();
375  return NewLI;
376 }
377 
379 AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
380  auto *M = RMWI->getModule();
381  Type *NewTy =
382  getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
383 
384  IRBuilder<> Builder(RMWI);
385 
386  Value *Addr = RMWI->getPointerOperand();
387  Value *Val = RMWI->getValOperand();
388  Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
389  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
390  Value *NewVal = Builder.CreateBitCast(Val, NewTy);
391 
392  auto *NewRMWI =
393  Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
394  RMWI->getAlign(), RMWI->getOrdering());
395  NewRMWI->setVolatile(RMWI->isVolatile());
396  LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
397 
398  Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
399  RMWI->replaceAllUsesWith(NewRVal);
400  RMWI->eraseFromParent();
401  return NewRMWI;
402 }
403 
404 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
405  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
407  return false;
409  expandAtomicOpToLLSC(
410  LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
411  LI->getOrdering(),
412  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
413  return true;
415  return expandAtomicLoadToLL(LI);
417  return expandAtomicLoadToCmpXchg(LI);
420  return true;
421  default:
422  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
423  }
424 }
425 
426 bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
427  switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
429  return false;
431  expandAtomicStore(SI);
432  return true;
434  SI->setAtomic(AtomicOrdering::NotAtomic);
435  return true;
436  default:
437  llvm_unreachable("Unhandled case in tryExpandAtomicStore");
438  }
439 }
440 
441 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
442  IRBuilder<> Builder(LI);
443 
444  // On some architectures, load-linked instructions are atomic for larger
445  // sizes than normal loads. For example, the only 64-bit load guaranteed
446  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
447  Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
448  LI->getPointerOperand(), LI->getOrdering());
449  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
450 
451  LI->replaceAllUsesWith(Val);
452  LI->eraseFromParent();
453 
454  return true;
455 }
456 
457 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
458  IRBuilder<> Builder(LI);
459  AtomicOrdering Order = LI->getOrdering();
460  if (Order == AtomicOrdering::Unordered)
462 
463  Value *Addr = LI->getPointerOperand();
464  Type *Ty = LI->getType();
465  Constant *DummyVal = Constant::getNullValue(Ty);
466 
467  Value *Pair = Builder.CreateAtomicCmpXchg(
468  Addr, DummyVal, DummyVal, LI->getAlign(), Order,
470  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
471 
472  LI->replaceAllUsesWith(Loaded);
473  LI->eraseFromParent();
474 
475  return true;
476 }
477 
478 /// Convert an atomic store of a non-integral type to an integer store of the
479 /// equivalent bitwidth. We used to not support floating point or vector
480 /// atomics in the IR at all. The backends learned to deal with the bitcast
481 /// idiom because that was the only way of expressing the notion of a atomic
482 /// float or vector store. The long term plan is to teach each backend to
483 /// instruction select from the original atomic store, but as a migration
484 /// mechanism, we convert back to the old format which the backends understand.
485 /// Each backend will need individual work to recognize the new format.
486 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
488  auto *M = SI->getModule();
489  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
490  M->getDataLayout());
491  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
492 
493  Value *Addr = SI->getPointerOperand();
494  Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
495  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
496 
497  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
498  NewSI->setAlignment(SI->getAlign());
499  NewSI->setVolatile(SI->isVolatile());
500  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
501  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
502  SI->eraseFromParent();
503  return NewSI;
504 }
505 
506 void AtomicExpand::expandAtomicStore(StoreInst *SI) {
507  // This function is only called on atomic stores that are too large to be
508  // atomic if implemented as a native store. So we replace them by an
509  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
510  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
511  // It is the responsibility of the target to only signal expansion via
512  // shouldExpandAtomicRMW in cases where this is required and possible.
514  AtomicRMWInst *AI = Builder.CreateAtomicRMW(
515  AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
516  SI->getAlign(), SI->getOrdering());
517  SI->eraseFromParent();
518 
519  // Now we have an appropriate swap instruction, lower it as usual.
520  tryExpandAtomicRMW(AI);
521 }
522 
524  Value *Loaded, Value *NewVal, Align AddrAlign,
525  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
526  Value *&Success, Value *&NewLoaded) {
527  Type *OrigTy = NewVal->getType();
528 
529  // This code can go away when cmpxchg supports FP types.
530  bool NeedBitcast = OrigTy->isFloatingPointTy();
531  if (NeedBitcast) {
532  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
533  unsigned AS = Addr->getType()->getPointerAddressSpace();
534  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
535  NewVal = Builder.CreateBitCast(NewVal, IntTy);
536  Loaded = Builder.CreateBitCast(Loaded, IntTy);
537  }
538 
539  Value *Pair = Builder.CreateAtomicCmpXchg(
540  Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
542  Success = Builder.CreateExtractValue(Pair, 1, "success");
543  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
544 
545  if (NeedBitcast)
546  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
547 }
548 
549 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
550  LLVMContext &Ctx = AI->getModule()->getContext();
551  TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
552  switch (Kind) {
554  return false;
556  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
557  unsigned ValueSize = getAtomicOpSize(AI);
558  if (ValueSize < MinCASSize) {
559  expandPartwordAtomicRMW(AI,
561  } else {
562  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
563  return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
564  AI->getValOperand());
565  };
566  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
567  AI->getAlign(), AI->getOrdering(), PerformOp);
568  }
569  return true;
570  }
572  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
573  unsigned ValueSize = getAtomicOpSize(AI);
574  if (ValueSize < MinCASSize) {
575  // TODO: Handle atomicrmw fadd/fsub
576  if (AI->getType()->isFloatingPointTy())
577  return false;
578 
579  expandPartwordAtomicRMW(AI,
581  } else {
583  Ctx.getSyncScopeNames(SSNs);
584  auto MemScope = SSNs[AI->getSyncScopeID()].empty()
585  ? "system"
586  : SSNs[AI->getSyncScopeID()];
588  ORE.emit([&]() {
589  return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
590  << "A compare and swap loop was generated for an atomic "
591  << AI->getOperationName(AI->getOperation()) << " operation at "
592  << MemScope << " memory scope";
593  });
595  }
596  return true;
597  }
599  expandAtomicRMWToMaskedIntrinsic(AI);
600  return true;
601  }
603  TLI->emitBitTestAtomicRMWIntrinsic(AI);
604  return true;
605  }
607  return lowerAtomicRMWInst(AI);
608  default:
609  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
610  }
611 }
612 
613 namespace {
614 
615 struct PartwordMaskValues {
616  // These three fields are guaranteed to be set by createMaskInstrs.
617  Type *WordType = nullptr;
618  Type *ValueType = nullptr;
619  Value *AlignedAddr = nullptr;
620  Align AlignedAddrAlignment;
621  // The remaining fields can be null.
622  Value *ShiftAmt = nullptr;
623  Value *Mask = nullptr;
624  Value *Inv_Mask = nullptr;
625 };
626 
628 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
629  auto PrintObj = [&O](auto *V) {
630  if (V)
631  O << *V;
632  else
633  O << "nullptr";
634  O << '\n';
635  };
636  O << "PartwordMaskValues {\n";
637  O << " WordType: ";
638  PrintObj(PMV.WordType);
639  O << " ValueType: ";
640  PrintObj(PMV.ValueType);
641  O << " AlignedAddr: ";
642  PrintObj(PMV.AlignedAddr);
643  O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
644  O << " ShiftAmt: ";
645  PrintObj(PMV.ShiftAmt);
646  O << " Mask: ";
647  PrintObj(PMV.Mask);
648  O << " Inv_Mask: ";
649  PrintObj(PMV.Inv_Mask);
650  O << "}\n";
651  return O;
652 }
653 
654 } // end anonymous namespace
655 
656 /// This is a helper function which builds instructions to provide
657 /// values necessary for partword atomic operations. It takes an
658 /// incoming address, Addr, and ValueType, and constructs the address,
659 /// shift-amounts and masks needed to work with a larger value of size
660 /// WordSize.
661 ///
662 /// AlignedAddr: Addr rounded down to a multiple of WordSize
663 ///
664 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
665 /// from AlignAddr for it to have the same value as if
666 /// ValueType was loaded from Addr.
667 ///
668 /// Mask: Value to mask with the value loaded from AlignAddr to
669 /// include only the part that would've been loaded from Addr.
670 ///
671 /// Inv_Mask: The inverse of Mask.
672 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
674  Align AddrAlign,
675  unsigned MinWordSize) {
676  PartwordMaskValues PMV;
677 
678  Module *M = I->getModule();
679  LLVMContext &Ctx = M->getContext();
680  const DataLayout &DL = M->getDataLayout();
681  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
682 
683  PMV.ValueType = ValueType;
684  PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
685  : ValueType;
686  if (PMV.ValueType == PMV.WordType) {
687  PMV.AlignedAddr = Addr;
688  PMV.AlignedAddrAlignment = AddrAlign;
689  PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
690  PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
691  return PMV;
692  }
693 
694  assert(ValueSize < MinWordSize);
695 
696  Type *WordPtrType =
697  PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
698 
699  // TODO: we could skip some of this if AddrAlign >= MinWordSize.
700  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
701  PMV.AlignedAddr = Builder.CreateIntToPtr(
702  Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
703  "AlignedAddr");
704  PMV.AlignedAddrAlignment = Align(MinWordSize);
705 
706  Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
707  if (DL.isLittleEndian()) {
708  // turn bytes into bits
709  PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
710  } else {
711  // turn bytes into bits, and count from the other side.
712  PMV.ShiftAmt = Builder.CreateShl(
713  Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
714  }
715 
716  PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
717  PMV.Mask = Builder.CreateShl(
718  ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
719  "Mask");
720  PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
721  return PMV;
722 }
723 
725  const PartwordMaskValues &PMV) {
726  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
727  if (PMV.WordType == PMV.ValueType)
728  return WideWord;
729 
730  Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
731  Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
732  return Trunc;
733 }
734 
736  Value *Updated, const PartwordMaskValues &PMV) {
737  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
738  assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
739  if (PMV.WordType == PMV.ValueType)
740  return Updated;
741 
742  Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
743  Value *Shift =
744  Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
745  Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
746  Value *Or = Builder.CreateOr(And, Shift, "inserted");
747  return Or;
748 }
749 
750 /// Emit IR to implement a masked version of a given atomicrmw
751 /// operation. (That is, only the bits under the Mask should be
752 /// affected by the operation)
754  IRBuilder<> &Builder, Value *Loaded,
755  Value *Shifted_Inc, Value *Inc,
756  const PartwordMaskValues &PMV) {
757  // TODO: update to use
758  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
759  // to merge bits from two values without requiring PMV.Inv_Mask.
760  switch (Op) {
761  case AtomicRMWInst::Xchg: {
762  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
763  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
764  return FinalVal;
765  }
766  case AtomicRMWInst::Or:
767  case AtomicRMWInst::Xor:
768  case AtomicRMWInst::And:
769  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
770  case AtomicRMWInst::Add:
771  case AtomicRMWInst::Sub:
772  case AtomicRMWInst::Nand: {
773  // The other arithmetic ops need to be masked into place.
774  Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
775  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
776  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
777  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
778  return FinalVal;
779  }
780  case AtomicRMWInst::Max:
781  case AtomicRMWInst::Min:
782  case AtomicRMWInst::UMax:
783  case AtomicRMWInst::UMin: {
784  // Finally, comparison ops will operate on the full value, so
785  // truncate down to the original size, and expand out again after
786  // doing the operation.
787  Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
788  Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
789  Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
790  return FinalVal;
791  }
792  default:
793  llvm_unreachable("Unknown atomic op");
794  }
795 }
796 
797 /// Expand a sub-word atomicrmw operation into an appropriate
798 /// word-sized operation.
799 ///
800 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
801 /// way as a typical atomicrmw expansion. The only difference here is
802 /// that the operation inside of the loop may operate upon only a
803 /// part of the value.
804 void AtomicExpand::expandPartwordAtomicRMW(
806  AtomicOrdering MemOpOrder = AI->getOrdering();
807  SyncScope::ID SSID = AI->getSyncScopeID();
808 
809  IRBuilder<> Builder(AI);
810 
811  PartwordMaskValues PMV =
813  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
814 
815  Value *ValOperand_Shifted =
816  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
817  PMV.ShiftAmt, "ValOperand_Shifted");
818 
819  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
820  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
821  ValOperand_Shifted, AI->getValOperand(), PMV);
822  };
823 
824  Value *OldResult;
826  OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
827  PMV.AlignedAddrAlignment, MemOpOrder, SSID,
828  PerformPartwordOp, createCmpXchgInstFun);
829  } else {
831  OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
832  PMV.AlignedAddrAlignment, MemOpOrder,
833  PerformPartwordOp);
834  }
835 
836  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
837  AI->replaceAllUsesWith(FinalOldResult);
838  AI->eraseFromParent();
839 }
840 
841 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
842 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
843  IRBuilder<> Builder(AI);
845 
847  Op == AtomicRMWInst::And) &&
848  "Unable to widen operation");
849 
850  PartwordMaskValues PMV =
852  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
853 
854  Value *ValOperand_Shifted =
855  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
856  PMV.ShiftAmt, "ValOperand_Shifted");
857 
858  Value *NewOperand;
859 
860  if (Op == AtomicRMWInst::And)
861  NewOperand =
862  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
863  else
864  NewOperand = ValOperand_Shifted;
865 
866  AtomicRMWInst *NewAI =
867  Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
868  PMV.AlignedAddrAlignment, AI->getOrdering());
869 
870  Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
871  AI->replaceAllUsesWith(FinalOldResult);
872  AI->eraseFromParent();
873  return NewAI;
874 }
875 
876 bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
877  // The basic idea here is that we're expanding a cmpxchg of a
878  // smaller memory size up to a word-sized cmpxchg. To do this, we
879  // need to add a retry-loop for strong cmpxchg, so that
880  // modifications to other parts of the word don't cause a spurious
881  // failure.
882 
883  // This generates code like the following:
884  // [[Setup mask values PMV.*]]
885  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
886  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
887  // %InitLoaded = load i32* %addr
888  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
889  // br partword.cmpxchg.loop
890  // partword.cmpxchg.loop:
891  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
892  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
893  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
894  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
895  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
896  // i32 %FullWord_NewVal success_ordering failure_ordering
897  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
898  // %Success = extractvalue { i32, i1 } %NewCI, 1
899  // br i1 %Success, label %partword.cmpxchg.end,
900  // label %partword.cmpxchg.failure
901  // partword.cmpxchg.failure:
902  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
903  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
904  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
905  // label %partword.cmpxchg.end
906  // partword.cmpxchg.end:
907  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
908  // %FinalOldVal = trunc i32 %tmp1 to i8
909  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
910  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
911 
912  Value *Addr = CI->getPointerOperand();
913  Value *Cmp = CI->getCompareOperand();
914  Value *NewVal = CI->getNewValOperand();
915 
916  BasicBlock *BB = CI->getParent();
917  Function *F = BB->getParent();
918  IRBuilder<> Builder(CI);
919  LLVMContext &Ctx = Builder.getContext();
920 
921  BasicBlock *EndBB =
922  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
923  auto FailureBB =
924  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
925  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
926 
927  // The split call above "helpfully" added a branch at the end of BB
928  // (to the wrong place).
929  std::prev(BB->end())->eraseFromParent();
930  Builder.SetInsertPoint(BB);
931 
932  PartwordMaskValues PMV =
934  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
935 
936  // Shift the incoming values over, into the right location in the word.
937  Value *NewVal_Shifted =
938  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
939  Value *Cmp_Shifted =
940  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
941 
942  // Load the entire current word, and mask into place the expected and new
943  // values
944  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
945  InitLoaded->setVolatile(CI->isVolatile());
946  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
947  Builder.CreateBr(LoopBB);
948 
949  // partword.cmpxchg.loop:
950  Builder.SetInsertPoint(LoopBB);
951  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
952  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
953 
954  // Mask/Or the expected and new values into place in the loaded word.
955  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
956  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
957  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
958  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
960  NewCI->setVolatile(CI->isVolatile());
961  // When we're building a strong cmpxchg, we need a loop, so you
962  // might think we could use a weak cmpxchg inside. But, using strong
963  // allows the below comparison for ShouldContinue, and we're
964  // expecting the underlying cmpxchg to be a machine instruction,
965  // which is strong anyways.
966  NewCI->setWeak(CI->isWeak());
967 
968  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
969  Value *Success = Builder.CreateExtractValue(NewCI, 1);
970 
971  if (CI->isWeak())
972  Builder.CreateBr(EndBB);
973  else
974  Builder.CreateCondBr(Success, EndBB, FailureBB);
975 
976  // partword.cmpxchg.failure:
977  Builder.SetInsertPoint(FailureBB);
978  // Upon failure, verify that the masked-out part of the loaded value
979  // has been modified. If it didn't, abort the cmpxchg, since the
980  // masked-in part must've.
981  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
982  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
983  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
984 
985  // Add the second value to the phi from above
986  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
987 
988  // partword.cmpxchg.end:
989  Builder.SetInsertPoint(CI);
990 
991  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
992  Value *Res = UndefValue::get(CI->getType());
993  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
994  Res = Builder.CreateInsertValue(Res, Success, 1);
995 
996  CI->replaceAllUsesWith(Res);
997  CI->eraseFromParent();
998  return true;
999 }
1000 
1001 void AtomicExpand::expandAtomicOpToLLSC(
1002  Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1003  AtomicOrdering MemOpOrder,
1004  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1006  Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1007  MemOpOrder, PerformOp);
1008 
1009  I->replaceAllUsesWith(Loaded);
1010  I->eraseFromParent();
1011 }
1012 
1013 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1014  IRBuilder<> Builder(AI);
1015 
1016  PartwordMaskValues PMV =
1018  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1019 
1020  // The value operand must be sign-extended for signed min/max so that the
1021  // target's signed comparison instructions can be used. Otherwise, just
1022  // zero-ext.
1023  Instruction::CastOps CastOp = Instruction::ZExt;
1024  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1025  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1026  CastOp = Instruction::SExt;
1027 
1028  Value *ValOperand_Shifted = Builder.CreateShl(
1029  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1030  PMV.ShiftAmt, "ValOperand_Shifted");
1031  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1032  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1033  AI->getOrdering());
1034  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1035  AI->replaceAllUsesWith(FinalOldResult);
1036  AI->eraseFromParent();
1037 }
1038 
1039 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1040  IRBuilder<> Builder(CI);
1041 
1042  PartwordMaskValues PMV = createMaskInstrs(
1043  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1044  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1045 
1046  Value *CmpVal_Shifted = Builder.CreateShl(
1047  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1048  "CmpVal_Shifted");
1049  Value *NewVal_Shifted = Builder.CreateShl(
1050  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1051  "NewVal_Shifted");
1052  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1053  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1054  CI->getMergedOrdering());
1055  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1056  Value *Res = UndefValue::get(CI->getType());
1057  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1058  Value *Success = Builder.CreateICmpEQ(
1059  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1060  Res = Builder.CreateInsertValue(Res, Success, 1);
1061 
1062  CI->replaceAllUsesWith(Res);
1063  CI->eraseFromParent();
1064 }
1065 
1066 Value *AtomicExpand::insertRMWLLSCLoop(
1067  IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1068  AtomicOrdering MemOpOrder,
1069  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1070  LLVMContext &Ctx = Builder.getContext();
1071  BasicBlock *BB = Builder.GetInsertBlock();
1072  Function *F = BB->getParent();
1073 
1074  assert(AddrAlign >=
1075  F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1076  "Expected at least natural alignment at this point.");
1077 
1078  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1079  //
1080  // The standard expansion we produce is:
1081  // [...]
1082  // atomicrmw.start:
1083  // %loaded = @load.linked(%addr)
1084  // %new = some_op iN %loaded, %incr
1085  // %stored = @store_conditional(%new, %addr)
1086  // %try_again = icmp i32 ne %stored, 0
1087  // br i1 %try_again, label %loop, label %atomicrmw.end
1088  // atomicrmw.end:
1089  // [...]
1090  BasicBlock *ExitBB =
1091  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1092  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1093 
1094  // The split call above "helpfully" added a branch at the end of BB (to the
1095  // wrong place).
1096  std::prev(BB->end())->eraseFromParent();
1097  Builder.SetInsertPoint(BB);
1098  Builder.CreateBr(LoopBB);
1099 
1100  // Start the main loop block now that we've taken care of the preliminaries.
1101  Builder.SetInsertPoint(LoopBB);
1102  Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1103 
1104  Value *NewVal = PerformOp(Builder, Loaded);
1105 
1106  Value *StoreSuccess =
1107  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1108  Value *TryAgain = Builder.CreateICmpNE(
1109  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1110  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1111 
1112  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1113  return Loaded;
1114 }
1115 
1116 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1117 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1118 /// IR. As a migration step, we convert back to what use to be the standard
1119 /// way to represent a pointer cmpxchg so that we can update backends one by
1120 /// one.
1122 AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1123  auto *M = CI->getModule();
1124  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1125  M->getDataLayout());
1126 
1127  IRBuilder<> Builder(CI);
1128 
1129  Value *Addr = CI->getPointerOperand();
1130  Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
1131  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1132 
1133  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1134  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1135 
1136  auto *NewCI = Builder.CreateAtomicCmpXchg(
1137  NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1138  CI->getFailureOrdering(), CI->getSyncScopeID());
1139  NewCI->setVolatile(CI->isVolatile());
1140  NewCI->setWeak(CI->isWeak());
1141  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1142 
1143  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1144  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1145 
1146  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1147 
1148  Value *Res = UndefValue::get(CI->getType());
1149  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1150  Res = Builder.CreateInsertValue(Res, Succ, 1);
1151 
1152  CI->replaceAllUsesWith(Res);
1153  CI->eraseFromParent();
1154  return NewCI;
1155 }
1156 
1157 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1158  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1159  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1160  Value *Addr = CI->getPointerOperand();
1161  BasicBlock *BB = CI->getParent();
1162  Function *F = BB->getParent();
1163  LLVMContext &Ctx = F->getContext();
1164  // If shouldInsertFencesForAtomic() returns true, then the target does not
1165  // want to deal with memory orders, and emitLeading/TrailingFence should take
1166  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1167  // should preserve the ordering.
1168  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1169  AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1171  : CI->getMergedOrdering();
1172 
1173  // In implementations which use a barrier to achieve release semantics, we can
1174  // delay emitting this barrier until we know a store is actually going to be
1175  // attempted. The cost of this delay is that we need 2 copies of the block
1176  // emitting the load-linked, affecting code size.
1177  //
1178  // Ideally, this logic would be unconditional except for the minsize check
1179  // since in other cases the extra blocks naturally collapse down to the
1180  // minimal loop. Unfortunately, this puts too much stress on later
1181  // optimisations so we avoid emitting the extra logic in those cases too.
1182  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1183  SuccessOrder != AtomicOrdering::Monotonic &&
1184  SuccessOrder != AtomicOrdering::Acquire &&
1185  !F->hasMinSize();
1186 
1187  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1188  // do it even on minsize.
1189  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1190 
1191  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1192  //
1193  // The full expansion we produce is:
1194  // [...]
1195  // %aligned.addr = ...
1196  // cmpxchg.start:
1197  // %unreleasedload = @load.linked(%aligned.addr)
1198  // %unreleasedload.extract = extract value from %unreleasedload
1199  // %should_store = icmp eq %unreleasedload.extract, %desired
1200  // br i1 %should_store, label %cmpxchg.releasingstore,
1201  // label %cmpxchg.nostore
1202  // cmpxchg.releasingstore:
1203  // fence?
1204  // br label cmpxchg.trystore
1205  // cmpxchg.trystore:
1206  // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1207  // [%releasedload, %cmpxchg.releasedload]
1208  // %updated.new = insert %new into %loaded.trystore
1209  // %stored = @store_conditional(%updated.new, %aligned.addr)
1210  // %success = icmp eq i32 %stored, 0
1211  // br i1 %success, label %cmpxchg.success,
1212  // label %cmpxchg.releasedload/%cmpxchg.failure
1213  // cmpxchg.releasedload:
1214  // %releasedload = @load.linked(%aligned.addr)
1215  // %releasedload.extract = extract value from %releasedload
1216  // %should_store = icmp eq %releasedload.extract, %desired
1217  // br i1 %should_store, label %cmpxchg.trystore,
1218  // label %cmpxchg.failure
1219  // cmpxchg.success:
1220  // fence?
1221  // br label %cmpxchg.end
1222  // cmpxchg.nostore:
1223  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1224  // [%releasedload,
1225  // %cmpxchg.releasedload/%cmpxchg.trystore]
1226  // @load_linked_fail_balance()?
1227  // br label %cmpxchg.failure
1228  // cmpxchg.failure:
1229  // fence?
1230  // br label %cmpxchg.end
1231  // cmpxchg.end:
1232  // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1233  // [%loaded.trystore, %cmpxchg.trystore]
1234  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1235  // %loaded = extract value from %loaded.exit
1236  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1237  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1238  // [...]
1239  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1240  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1241  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1242  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1243  auto ReleasedLoadBB =
1244  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1245  auto TryStoreBB =
1246  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1247  auto ReleasingStoreBB =
1248  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1249  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1250 
1251  // This grabs the DebugLoc from CI
1252  IRBuilder<> Builder(CI);
1253 
1254  // The split call above "helpfully" added a branch at the end of BB (to the
1255  // wrong place), but we might want a fence too. It's easiest to just remove
1256  // the branch entirely.
1257  std::prev(BB->end())->eraseFromParent();
1258  Builder.SetInsertPoint(BB);
1259  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1260  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1261 
1262  PartwordMaskValues PMV =
1264  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1265  Builder.CreateBr(StartBB);
1266 
1267  // Start the main loop block now that we've taken care of the preliminaries.
1268  Builder.SetInsertPoint(StartBB);
1269  Value *UnreleasedLoad =
1270  TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1271  Value *UnreleasedLoadExtract =
1272  extractMaskedValue(Builder, UnreleasedLoad, PMV);
1273  Value *ShouldStore = Builder.CreateICmpEQ(
1274  UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1275 
1276  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1277  // jump straight past that fence instruction (if it exists).
1278  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1279 
1280  Builder.SetInsertPoint(ReleasingStoreBB);
1281  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1282  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1283  Builder.CreateBr(TryStoreBB);
1284 
1285  Builder.SetInsertPoint(TryStoreBB);
1286  PHINode *LoadedTryStore =
1287  Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1288  LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1289  Value *NewValueInsert =
1290  insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1291  Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1292  PMV.AlignedAddr, MemOpOrder);
1293  StoreSuccess = Builder.CreateICmpEQ(
1294  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1295  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1296  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1297  CI->isWeak() ? FailureBB : RetryBB);
1298 
1299  Builder.SetInsertPoint(ReleasedLoadBB);
1300  Value *SecondLoad;
1301  if (HasReleasedLoadBB) {
1302  SecondLoad =
1303  TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1304  Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1305  ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1306  CI->getCompareOperand(), "should_store");
1307 
1308  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1309  // jump straight past that fence instruction (if it exists).
1310  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1311  // Update PHI node in TryStoreBB.
1312  LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1313  } else
1314  Builder.CreateUnreachable();
1315 
1316  // Make sure later instructions don't get reordered with a fence if
1317  // necessary.
1318  Builder.SetInsertPoint(SuccessBB);
1319  if (ShouldInsertFencesForAtomic)
1320  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1321  Builder.CreateBr(ExitBB);
1322 
1323  Builder.SetInsertPoint(NoStoreBB);
1324  PHINode *LoadedNoStore =
1325  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1326  LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1327  if (HasReleasedLoadBB)
1328  LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1329 
1330  // In the failing case, where we don't execute the store-conditional, the
1331  // target might want to balance out the load-linked with a dedicated
1332  // instruction (e.g., on ARM, clearing the exclusive monitor).
1333  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1334  Builder.CreateBr(FailureBB);
1335 
1336  Builder.SetInsertPoint(FailureBB);
1337  PHINode *LoadedFailure =
1338  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1339  LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1340  if (CI->isWeak())
1341  LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1342  if (ShouldInsertFencesForAtomic)
1343  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1344  Builder.CreateBr(ExitBB);
1345 
1346  // Finally, we have control-flow based knowledge of whether the cmpxchg
1347  // succeeded or not. We expose this to later passes by converting any
1348  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1349  // PHI.
1350  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1351  PHINode *LoadedExit =
1352  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1353  LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1354  LoadedExit->addIncoming(LoadedFailure, FailureBB);
1355  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1356  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1357  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1358 
1359  // This is the "exit value" from the cmpxchg expansion. It may be of
1360  // a type wider than the one in the cmpxchg instruction.
1361  Value *LoadedFull = LoadedExit;
1362 
1363  Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1364  Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1365 
1366  // Look for any users of the cmpxchg that are just comparing the loaded value
1367  // against the desired one, and replace them with the CFG-derived version.
1369  for (auto User : CI->users()) {
1370  ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1371  if (!EV)
1372  continue;
1373 
1374  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1375  "weird extraction from { iN, i1 }");
1376 
1377  if (EV->getIndices()[0] == 0)
1378  EV->replaceAllUsesWith(Loaded);
1379  else
1381 
1382  PrunedInsts.push_back(EV);
1383  }
1384 
1385  // We can remove the instructions now we're no longer iterating through them.
1386  for (auto EV : PrunedInsts)
1387  EV->eraseFromParent();
1388 
1389  if (!CI->use_empty()) {
1390  // Some use of the full struct return that we don't understand has happened,
1391  // so we've got to reconstruct it properly.
1392  Value *Res;
1393  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1394  Res = Builder.CreateInsertValue(Res, Success, 1);
1395 
1396  CI->replaceAllUsesWith(Res);
1397  }
1398 
1399  CI->eraseFromParent();
1400  return true;
1401 }
1402 
1403 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
1404  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1405  if (!C)
1406  return false;
1407 
1409  switch (Op) {
1410  case AtomicRMWInst::Add:
1411  case AtomicRMWInst::Sub:
1412  case AtomicRMWInst::Or:
1413  case AtomicRMWInst::Xor:
1414  return C->isZero();
1415  case AtomicRMWInst::And:
1416  return C->isMinusOne();
1417  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1418  default:
1419  return false;
1420  }
1421 }
1422 
1423 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1424  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1425  tryExpandAtomicLoad(ResultingLoad);
1426  return true;
1427  }
1428  return false;
1429 }
1430 
1431 Value *AtomicExpand::insertRMWCmpXchgLoop(
1432  IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1433  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1434  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1435  CreateCmpXchgInstFun CreateCmpXchg) {
1436  LLVMContext &Ctx = Builder.getContext();
1437  BasicBlock *BB = Builder.GetInsertBlock();
1438  Function *F = BB->getParent();
1439 
1440  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1441  //
1442  // The standard expansion we produce is:
1443  // [...]
1444  // %init_loaded = load atomic iN* %addr
1445  // br label %loop
1446  // loop:
1447  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1448  // %new = some_op iN %loaded, %incr
1449  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1450  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1451  // %success = extractvalue { iN, i1 } %pair, 1
1452  // br i1 %success, label %atomicrmw.end, label %loop
1453  // atomicrmw.end:
1454  // [...]
1455  BasicBlock *ExitBB =
1456  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1457  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1458 
1459  // The split call above "helpfully" added a branch at the end of BB (to the
1460  // wrong place), but we want a load. It's easiest to just remove
1461  // the branch entirely.
1462  std::prev(BB->end())->eraseFromParent();
1463  Builder.SetInsertPoint(BB);
1464  LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1465  Builder.CreateBr(LoopBB);
1466 
1467  // Start the main loop block now that we've taken care of the preliminaries.
1468  Builder.SetInsertPoint(LoopBB);
1469  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1470  Loaded->addIncoming(InitLoaded, BB);
1471 
1472  Value *NewVal = PerformOp(Builder, Loaded);
1473 
1474  Value *NewLoaded = nullptr;
1475  Value *Success = nullptr;
1476 
1477  CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1478  MemOpOrder == AtomicOrdering::Unordered
1480  : MemOpOrder,
1481  SSID, Success, NewLoaded);
1482  assert(Success && NewLoaded);
1483 
1484  Loaded->addIncoming(NewLoaded, LoopBB);
1485 
1486  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1487 
1488  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1489  return NewLoaded;
1490 }
1491 
1492 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1493  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1494  unsigned ValueSize = getAtomicOpSize(CI);
1495 
1496  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1497  default:
1498  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1500  if (ValueSize < MinCASSize)
1501  return expandPartwordCmpXchg(CI);
1502  return false;
1504  return expandAtomicCmpXchg(CI);
1505  }
1507  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1508  return true;
1510  return lowerAtomicCmpXchgInst(CI);
1511  }
1512 }
1513 
1514 // Note: This function is exposed externally by AtomicExpandUtils.h
1516  CreateCmpXchgInstFun CreateCmpXchg) {
1517  IRBuilder<> Builder(AI);
1518  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1519  Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1520  AI->getOrdering(), AI->getSyncScopeID(),
1521  [&](IRBuilder<> &Builder, Value *Loaded) {
1522  return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1523  AI->getValOperand());
1524  },
1525  CreateCmpXchg);
1526 
1527  AI->replaceAllUsesWith(Loaded);
1528  AI->eraseFromParent();
1529  return true;
1530 }
1531 
1532 // In order to use one of the sized library calls such as
1533 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1534 // must be one of the potentially-specialized sizes, and the value
1535 // type must actually exist in C on the target (otherwise, the
1536 // function wouldn't actually be defined.)
1537 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1538  const DataLayout &DL) {
1539  // TODO: "LargestSize" is an approximation for "largest type that
1540  // you can express in C". It seems to be the case that int128 is
1541  // supported on all 64-bit platforms, otherwise only up to 64-bit
1542  // integers are supported. If we get this wrong, then we'll try to
1543  // call a sized libcall that doesn't actually exist. There should
1544  // really be some more reliable way in LLVM of determining integer
1545  // sizes which are valid in the target's C ABI...
1546  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1547  return Alignment >= Size &&
1548  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1549  Size <= LargestSize;
1550 }
1551 
1552 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1553  static const RTLIB::Libcall Libcalls[6] = {
1554  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1555  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1556  unsigned Size = getAtomicOpSize(I);
1557 
1558  bool expanded = expandAtomicOpToLibcall(
1559  I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1560  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1561  if (!expanded)
1562  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1563 }
1564 
1565 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1566  static const RTLIB::Libcall Libcalls[6] = {
1567  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1568  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1569  unsigned Size = getAtomicOpSize(I);
1570 
1571  bool expanded = expandAtomicOpToLibcall(
1572  I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1573  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1574  if (!expanded)
1575  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1576 }
1577 
1578 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1579  static const RTLIB::Libcall Libcalls[6] = {
1580  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1581  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1582  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1583  unsigned Size = getAtomicOpSize(I);
1584 
1585  bool expanded = expandAtomicOpToLibcall(
1586  I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1587  I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1588  Libcalls);
1589  if (!expanded)
1590  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1591 }
1592 
1594  static const RTLIB::Libcall LibcallsXchg[6] = {
1595  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1596  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1597  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1598  static const RTLIB::Libcall LibcallsAdd[6] = {
1599  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1600  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1601  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1602  static const RTLIB::Libcall LibcallsSub[6] = {
1603  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1604  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1605  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1606  static const RTLIB::Libcall LibcallsAnd[6] = {
1607  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1608  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1609  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1610  static const RTLIB::Libcall LibcallsOr[6] = {
1611  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1612  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1613  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1614  static const RTLIB::Libcall LibcallsXor[6] = {
1615  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1616  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1617  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1618  static const RTLIB::Libcall LibcallsNand[6] = {
1619  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1620  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1621  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1622 
1623  switch (Op) {
1625  llvm_unreachable("Should not have BAD_BINOP.");
1626  case AtomicRMWInst::Xchg:
1627  return makeArrayRef(LibcallsXchg);
1628  case AtomicRMWInst::Add:
1629  return makeArrayRef(LibcallsAdd);
1630  case AtomicRMWInst::Sub:
1631  return makeArrayRef(LibcallsSub);
1632  case AtomicRMWInst::And:
1633  return makeArrayRef(LibcallsAnd);
1634  case AtomicRMWInst::Or:
1635  return makeArrayRef(LibcallsOr);
1636  case AtomicRMWInst::Xor:
1637  return makeArrayRef(LibcallsXor);
1638  case AtomicRMWInst::Nand:
1639  return makeArrayRef(LibcallsNand);
1640  case AtomicRMWInst::Max:
1641  case AtomicRMWInst::Min:
1642  case AtomicRMWInst::UMax:
1643  case AtomicRMWInst::UMin:
1644  case AtomicRMWInst::FAdd:
1645  case AtomicRMWInst::FSub:
1646  // No atomic libcalls are available for max/min/umax/umin.
1647  return {};
1648  }
1649  llvm_unreachable("Unexpected AtomicRMW operation.");
1650 }
1651 
1652 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1653  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1654 
1655  unsigned Size = getAtomicOpSize(I);
1656 
1657  bool Success = false;
1658  if (!Libcalls.empty())
1659  Success = expandAtomicOpToLibcall(
1660  I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1661  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1662 
1663  // The expansion failed: either there were no libcalls at all for
1664  // the operation (min/max), or there were only size-specialized
1665  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1666  // CAS libcall, via a CAS loop, instead.
1667  if (!Success) {
1669  I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
1670  Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1671  SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1672  // Create the CAS instruction normally...
1673  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1674  Addr, Loaded, NewVal, Alignment, MemOpOrder,
1676  Success = Builder.CreateExtractValue(Pair, 1, "success");
1677  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1678 
1679  // ...and then expand the CAS into a libcall.
1680  expandAtomicCASToLibcall(Pair);
1681  });
1682  }
1683 }
1684 
1685 // A helper routine for the above expandAtomic*ToLibcall functions.
1686 //
1687 // 'Libcalls' contains an array of enum values for the particular
1688 // ATOMIC libcalls to be emitted. All of the other arguments besides
1689 // 'I' are extracted from the Instruction subclass by the
1690 // caller. Depending on the particular call, some will be null.
1691 bool AtomicExpand::expandAtomicOpToLibcall(
1692  Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1693  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1694  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1695  assert(Libcalls.size() == 6);
1696 
1697  LLVMContext &Ctx = I->getContext();
1698  Module *M = I->getModule();
1699  const DataLayout &DL = M->getDataLayout();
1701  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1702 
1703  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1704  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1705 
1706  const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1707 
1708  // TODO: the "order" argument type is "int", not int32. So
1709  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1710  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1711  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1712  Constant *OrderingVal =
1713  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1714  Constant *Ordering2Val = nullptr;
1715  if (CASExpected) {
1716  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1717  Ordering2Val =
1718  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1719  }
1720  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1721 
1722  RTLIB::Libcall RTLibType;
1723  if (UseSizedLibcall) {
1724  switch (Size) {
1725  case 1:
1726  RTLibType = Libcalls[1];
1727  break;
1728  case 2:
1729  RTLibType = Libcalls[2];
1730  break;
1731  case 4:
1732  RTLibType = Libcalls[3];
1733  break;
1734  case 8:
1735  RTLibType = Libcalls[4];
1736  break;
1737  case 16:
1738  RTLibType = Libcalls[5];
1739  break;
1740  }
1741  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1742  RTLibType = Libcalls[0];
1743  } else {
1744  // Can't use sized function, and there's no generic for this
1745  // operation, so give up.
1746  return false;
1747  }
1748 
1749  if (!TLI->getLibcallName(RTLibType)) {
1750  // This target does not implement the requested atomic libcall so give up.
1751  return false;
1752  }
1753 
1754  // Build up the function call. There's two kinds. First, the sized
1755  // variants. These calls are going to be one of the following (with
1756  // N=1,2,4,8,16):
1757  // iN __atomic_load_N(iN *ptr, int ordering)
1758  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1759  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1760  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1761  // int success_order, int failure_order)
1762  //
1763  // Note that these functions can be used for non-integer atomic
1764  // operations, the values just need to be bitcast to integers on the
1765  // way in and out.
1766  //
1767  // And, then, the generic variants. They look like the following:
1768  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1769  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1770  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1771  // int ordering)
1772  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1773  // void *desired, int success_order,
1774  // int failure_order)
1775  //
1776  // The different signatures are built up depending on the
1777  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1778  // variables.
1779 
1780  AllocaInst *AllocaCASExpected = nullptr;
1781  Value *AllocaCASExpected_i8 = nullptr;
1782  AllocaInst *AllocaValue = nullptr;
1783  Value *AllocaValue_i8 = nullptr;
1784  AllocaInst *AllocaResult = nullptr;
1785  Value *AllocaResult_i8 = nullptr;
1786 
1787  Type *ResultTy;
1789  AttributeList Attr;
1790 
1791  // 'size' argument.
1792  if (!UseSizedLibcall) {
1793  // Note, getIntPtrType is assumed equivalent to size_t.
1794  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1795  }
1796 
1797  // 'ptr' argument.
1798  // note: This assumes all address spaces share a common libfunc
1799  // implementation and that addresses are convertable. For systems without
1800  // that property, we'd need to extend this mechanism to support AS-specific
1801  // families of atomic intrinsics.
1802  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1803  Value *PtrVal =
1804  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
1805  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1806  Args.push_back(PtrVal);
1807 
1808  // 'expected' argument, if present.
1809  if (CASExpected) {
1810  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1811  AllocaCASExpected->setAlignment(AllocaAlignment);
1812  unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
1813 
1814  AllocaCASExpected_i8 = Builder.CreateBitCast(
1815  AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
1816  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1817  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1818  Args.push_back(AllocaCASExpected_i8);
1819  }
1820 
1821  // 'val' argument ('desired' for cas), if present.
1822  if (ValueOperand) {
1823  if (UseSizedLibcall) {
1824  Value *IntValue =
1825  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1826  Args.push_back(IntValue);
1827  } else {
1828  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1829  AllocaValue->setAlignment(AllocaAlignment);
1830  AllocaValue_i8 =
1831  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1832  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1833  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1834  Args.push_back(AllocaValue_i8);
1835  }
1836  }
1837 
1838  // 'ret' argument.
1839  if (!CASExpected && HasResult && !UseSizedLibcall) {
1840  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1841  AllocaResult->setAlignment(AllocaAlignment);
1842  unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
1843  AllocaResult_i8 =
1844  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1845  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1846  Args.push_back(AllocaResult_i8);
1847  }
1848 
1849  // 'ordering' ('success_order' for cas) argument.
1850  Args.push_back(OrderingVal);
1851 
1852  // 'failure_order' argument, if present.
1853  if (Ordering2Val)
1854  Args.push_back(Ordering2Val);
1855 
1856  // Now, the return type.
1857  if (CASExpected) {
1858  ResultTy = Type::getInt1Ty(Ctx);
1859  Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1860  } else if (HasResult && UseSizedLibcall)
1861  ResultTy = SizedIntTy;
1862  else
1863  ResultTy = Type::getVoidTy(Ctx);
1864 
1865  // Done with setting up arguments and return types, create the call:
1866  SmallVector<Type *, 6> ArgTys;
1867  for (Value *Arg : Args)
1868  ArgTys.push_back(Arg->getType());
1869  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1870  FunctionCallee LibcallFn =
1871  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1872  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1873  Call->setAttributes(Attr);
1874  Value *Result = Call;
1875 
1876  // And then, extract the results...
1877  if (ValueOperand && !UseSizedLibcall)
1878  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1879 
1880  if (CASExpected) {
1881  // The final result from the CAS is {load of 'expected' alloca, bool result
1882  // from call}
1883  Type *FinalResultTy = I->getType();
1884  Value *V = UndefValue::get(FinalResultTy);
1885  Value *ExpectedOut = Builder.CreateAlignedLoad(
1886  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1887  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1888  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1889  V = Builder.CreateInsertValue(V, Result, 1);
1890  I->replaceAllUsesWith(V);
1891  } else if (HasResult) {
1892  Value *V;
1893  if (UseSizedLibcall)
1894  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1895  else {
1896  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1897  AllocaAlignment);
1898  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1899  }
1900  I->replaceAllUsesWith(V);
1901  }
1902  I->eraseFromParent();
1903  return true;
1904 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
ValueTypes.h
GetRMWLibcall
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Definition: AtomicExpandPass.cpp:1593
llvm::AtomicRMWInst::getOperationName
static StringRef getOperationName(BinOp Op)
Definition: Instructions.cpp:1671
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
canUseSizedAtomicCall
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
Definition: AtomicExpandPass.cpp:1537
AtomicOrdering.h
InstIterator.h
T
llvm::Function
Definition: Function.h:60
Pass.h
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:753
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:805
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:739
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
LowerAtomic.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::AtomicCmpXchgInst::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:605
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::AtomicCmpXchgInst::isVolatile
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:578
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
OptimizationRemarkEmitter.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
Module.h
llvm::AttributeList
Definition: Attributes.h:408
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:358
llvm::LLVMContext::getSyncScopeNames
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Definition: LLVMContext.cpp:309
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:268
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:182
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:224
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
F
#define F(x, y, z)
Definition: MD5.cpp:55
createCmpXchgInstFun
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
Definition: AtomicExpandPass.cpp:523
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::AtomicRMWInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:866
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
TargetLowering.h
llvm::TargetLoweringBase::AtomicExpansionKind::Expand
@ Expand
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::initializeAtomicExpandPass
void initializeAtomicExpandPass(PassRegistry &)
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:659
INITIALIZE_PASS
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Definition: AtomicExpandPass.cpp:134
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::User
Definition: User.h:44
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3392
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::AtomicCmpXchgInst::getFailureOrdering
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:617
llvm::ExtractValueInst::getNumIndices
unsigned getNumIndices() const
Definition: Instructions.h:2480
llvm::LoadInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:216
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:800
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
llvm::TargetLoweringBase::AtomicExpansionKind::CastToInteger
@ CastToInteger
llvm::Instruction
Definition: Instruction.h:42
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:749
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1156
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1769
llvm::AtomicExpandID
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
Definition: AtomicExpandPass.cpp:132
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::StoreInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:344
STLFunctionalExtras.h
AtomicExpandUtils.h
insertMaskedValue
static Value * insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:735
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:741
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:743
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:243
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::AtomicCmpXchgInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:582
llvm::EVT::getStoreSizeInBits
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:379
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:761
BasicBlock.h
llvm::AtomicCmpXchgInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:643
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:745
uint64_t
RuntimeLibcalls.h
llvm::TargetLoweringBase::AtomicExpansionKind::NotAtomic
@ NotAtomic
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:845
llvm::SyncScope::ID
uint8_t ID
Definition: LLVMContext.h:47
getAtomicOpSize
static unsigned getAtomicOpSize(LoadInst *LI)
Definition: AtomicExpandPass.cpp:140
performMaskedAtomicOp
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
Definition: AtomicExpandPass.cpp:753
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:757
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:751
ArrayRef.h
llvm::AtomicRMWInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:857
TargetPassConfig.h
llvm::lowerAtomicRMWInst
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
Definition: LowerAtomic.cpp:82
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:656
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::AtomicCmpXchgInst::isWeak
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:585
llvm::lowerAtomicCmpXchgInst
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:23
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::buildAtomicRMWValue
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:42
extractMaskedValue
static Value * extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:724
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::toCABI
AtomicOrderingCABI toCABI(AtomicOrdering AO)
Definition: AtomicOrdering.h:146
llvm::TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic
@ BitTestIntrinsic
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
DataLayout.h
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
TargetSubtargetInfo.h
llvm::AtomicCmpXchgInst::setWeak
void setWeak(bool IsWeak)
Definition: Instructions.h:587
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:254
llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:825
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:233
atomicSizeSupported
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Definition: AtomicExpandPass.cpp:164
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::ValueType
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Definition: ScheduleDAGInstrs.h:106
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:727
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:874
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AtomicExpandPass.cpp:57
Attributes.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
createMaskInstrs
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
Definition: AtomicExpandPass.cpp:672
llvm::AtomicRMWInst::isVolatile
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:835
Constant.h
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:867
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:249
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:767
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2411
Success
#define Success
Definition: AArch64Disassembler.cpp:280
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:747
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:262
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::AtomicCmpXchgInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:652
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:774
llvm::AttributeList::addRetAttribute
LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:523
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::AtomicRMWInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:874
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:125
llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition: Instructions.h:870
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
Instructions.h
llvm::LoadInst::isVolatile
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:213
llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly
SmallVector.h
User.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:764
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2664
llvm::AtomicCmpXchgInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:567
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::expandAtomicRMWToCmpXchg
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Definition: AtomicExpandPass.cpp:1515
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:771
llvm::TargetLoweringBase::getMaxAtomicSizeInBitsSupported
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
Definition: TargetLowering.h:1917
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:759
raw_ostream.h
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Value.h
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:675
InitializePasses.h
llvm::AtomicCmpXchgInst::getMergedOrdering
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:630
llvm::ExtractValueInst::getIndices
ArrayRef< unsigned > getIndices() const
Definition: Instructions.h:2476
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:522
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1152
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:755