LLVM  10.0.0svn
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/InstIterator.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Pass.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <iterator>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "atomic-expand"
55 
56 namespace {
57 
58  class AtomicExpand: public FunctionPass {
59  const TargetLowering *TLI = nullptr;
60 
61  public:
62  static char ID; // Pass identification, replacement for typeid
63 
64  AtomicExpand() : FunctionPass(ID) {
66  }
67 
68  bool runOnFunction(Function &F) override;
69 
70  private:
71  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
72  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
73  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
74  bool tryExpandAtomicLoad(LoadInst *LI);
75  bool expandAtomicLoadToLL(LoadInst *LI);
76  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
77  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
78  bool expandAtomicStore(StoreInst *SI);
79  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
80  Value *
81  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
82  AtomicOrdering MemOpOrder,
83  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
84  void expandAtomicOpToLLSC(
85  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
86  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
87  void expandPartwordAtomicRMW(
88  AtomicRMWInst *I,
90  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
91  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
93  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
94 
95  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96  static Value *insertRMWCmpXchgLoop(
97  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
98  AtomicOrdering MemOpOrder,
99  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
100  CreateCmpXchgInstFun CreateCmpXchg);
101  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102 
103  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104  bool isIdempotentRMW(AtomicRMWInst *RMWI);
105  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106 
107  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
108  Value *PointerOperand, Value *ValueOperand,
109  Value *CASExpected, AtomicOrdering Ordering,
110  AtomicOrdering Ordering2,
111  ArrayRef<RTLIB::Libcall> Libcalls);
112  void expandAtomicLoadToLibcall(LoadInst *LI);
113  void expandAtomicStoreToLibcall(StoreInst *LI);
114  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116 
117  friend bool
119  CreateCmpXchgInstFun CreateCmpXchg);
120  };
121 
122 } // end anonymous namespace
123 
124 char AtomicExpand::ID = 0;
125 
127 
128 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
129  false, false)
130 
131 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
132 
133 // Helper functions to retrieve the size of atomic instructions.
134 static unsigned getAtomicOpSize(LoadInst *LI) {
135  const DataLayout &DL = LI->getModule()->getDataLayout();
136  return DL.getTypeStoreSize(LI->getType());
137 }
138 
139 static unsigned getAtomicOpSize(StoreInst *SI) {
140  const DataLayout &DL = SI->getModule()->getDataLayout();
141  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
142 }
143 
144 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
145  const DataLayout &DL = RMWI->getModule()->getDataLayout();
146  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
147 }
148 
149 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
150  const DataLayout &DL = CASI->getModule()->getDataLayout();
151  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
152 }
153 
154 // Helper functions to retrieve the alignment of atomic instructions.
155 static unsigned getAtomicOpAlign(LoadInst *LI) {
156  unsigned Align = LI->getAlignment();
157  // In the future, if this IR restriction is relaxed, we should
158  // return DataLayout::getABITypeAlignment when there's no align
159  // value.
160  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
161  return Align;
162 }
163 
164 static unsigned getAtomicOpAlign(StoreInst *SI) {
165  unsigned Align = SI->getAlignment();
166  // In the future, if this IR restriction is relaxed, we should
167  // return DataLayout::getABITypeAlignment when there's no align
168  // value.
169  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
170  return Align;
171 }
172 
173 static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
174  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
175  // default alignment for load/store, the default here is to assume
176  // it has NATURAL alignment, not DataLayout-specified alignment.
177  const DataLayout &DL = RMWI->getModule()->getDataLayout();
178  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
179 }
180 
181 static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
182  // TODO(PR27168): same comment as above.
183  const DataLayout &DL = CASI->getModule()->getDataLayout();
184  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
185 }
186 
187 // Determine if a particular atomic operation has a supported size,
188 // and is of appropriate alignment, to be passed through for target
189 // lowering. (Versus turning into a __atomic libcall)
190 template <typename Inst>
191 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
192  unsigned Size = getAtomicOpSize(I);
193  unsigned Align = getAtomicOpAlign(I);
194  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
195 }
196 
198  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
199  if (!TPC)
200  return false;
201 
202  auto &TM = TPC->getTM<TargetMachine>();
203  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
204  return false;
205  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
206 
207  SmallVector<Instruction *, 1> AtomicInsts;
208 
209  // Changing control-flow while iterating through it is a bad idea, so gather a
210  // list of all atomic instructions before we start.
211  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
212  Instruction *I = &*II;
213  if (I->isAtomic() && !isa<FenceInst>(I))
214  AtomicInsts.push_back(I);
215  }
216 
217  bool MadeChange = false;
218  for (auto I : AtomicInsts) {
219  auto LI = dyn_cast<LoadInst>(I);
220  auto SI = dyn_cast<StoreInst>(I);
221  auto RMWI = dyn_cast<AtomicRMWInst>(I);
222  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
223  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
224 
225  // If the Size/Alignment is not supported, replace with a libcall.
226  if (LI) {
227  if (!atomicSizeSupported(TLI, LI)) {
228  expandAtomicLoadToLibcall(LI);
229  MadeChange = true;
230  continue;
231  }
232  } else if (SI) {
233  if (!atomicSizeSupported(TLI, SI)) {
234  expandAtomicStoreToLibcall(SI);
235  MadeChange = true;
236  continue;
237  }
238  } else if (RMWI) {
239  if (!atomicSizeSupported(TLI, RMWI)) {
240  expandAtomicRMWToLibcall(RMWI);
241  MadeChange = true;
242  continue;
243  }
244  } else if (CASI) {
245  if (!atomicSizeSupported(TLI, CASI)) {
246  expandAtomicCASToLibcall(CASI);
247  MadeChange = true;
248  continue;
249  }
250  }
251 
252  if (TLI->shouldInsertFencesForAtomic(I)) {
253  auto FenceOrdering = AtomicOrdering::Monotonic;
254  if (LI && isAcquireOrStronger(LI->getOrdering())) {
255  FenceOrdering = LI->getOrdering();
256  LI->setOrdering(AtomicOrdering::Monotonic);
257  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
258  FenceOrdering = SI->getOrdering();
259  SI->setOrdering(AtomicOrdering::Monotonic);
260  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
261  isAcquireOrStronger(RMWI->getOrdering()))) {
262  FenceOrdering = RMWI->getOrdering();
263  RMWI->setOrdering(AtomicOrdering::Monotonic);
264  } else if (CASI &&
265  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
267  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
268  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
269  // If a compare and swap is lowered to LL/SC, we can do smarter fence
270  // insertion, with a stronger one on the success path than on the
271  // failure path. As a result, fence insertion is directly done by
272  // expandAtomicCmpXchg in that case.
273  FenceOrdering = CASI->getSuccessOrdering();
274  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
275  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
276  }
277 
278  if (FenceOrdering != AtomicOrdering::Monotonic) {
279  MadeChange |= bracketInstWithFences(I, FenceOrdering);
280  }
281  }
282 
283  if (LI) {
284  if (LI->getType()->isFloatingPointTy()) {
285  // TODO: add a TLI hook to control this so that each target can
286  // convert to lowering the original type one at a time.
287  LI = convertAtomicLoadToIntegerType(LI);
288  assert(LI->getType()->isIntegerTy() && "invariant broken");
289  MadeChange = true;
290  }
291 
292  MadeChange |= tryExpandAtomicLoad(LI);
293  } else if (SI) {
294  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
295  // TODO: add a TLI hook to control this so that each target can
296  // convert to lowering the original type one at a time.
297  SI = convertAtomicStoreToIntegerType(SI);
298  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
299  "invariant broken");
300  MadeChange = true;
301  }
302 
303  if (TLI->shouldExpandAtomicStoreInIR(SI))
304  MadeChange |= expandAtomicStore(SI);
305  } else if (RMWI) {
306  // There are two different ways of expanding RMW instructions:
307  // - into a load if it is idempotent
308  // - into a Cmpxchg/LL-SC loop otherwise
309  // we try them in that order.
310 
311  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
312  MadeChange = true;
313  } else {
314  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
315  unsigned ValueSize = getAtomicOpSize(RMWI);
316  AtomicRMWInst::BinOp Op = RMWI->getOperation();
317  if (ValueSize < MinCASSize &&
318  (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
319  Op == AtomicRMWInst::And)) {
320  RMWI = widenPartwordAtomicRMW(RMWI);
321  MadeChange = true;
322  }
323 
324  MadeChange |= tryExpandAtomicRMW(RMWI);
325  }
326  } else if (CASI) {
327  // TODO: when we're ready to make the change at the IR level, we can
328  // extend convertCmpXchgToInteger for floating point too.
329  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
330  "unimplemented - floating point not legal at IR level");
331  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
332  // TODO: add a TLI hook to control this so that each target can
333  // convert to lowering the original type one at a time.
334  CASI = convertCmpXchgToIntegerType(CASI);
335  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
336  "invariant broken");
337  MadeChange = true;
338  }
339 
340  MadeChange |= tryExpandAtomicCmpXchg(CASI);
341  }
342  }
343  return MadeChange;
344 }
345 
346 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
347  IRBuilder<> Builder(I);
348 
349  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
350 
351  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
352  // We have a guard here because not every atomic operation generates a
353  // trailing fence.
354  if (TrailingFence)
355  TrailingFence->moveAfter(I);
356 
357  return (LeadingFence || TrailingFence);
358 }
359 
360 /// Get the iX type with the same bitwidth as T.
361 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
362  const DataLayout &DL) {
363  EVT VT = TLI->getMemValueType(DL, T);
364  unsigned BitWidth = VT.getStoreSizeInBits();
365  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
366  return IntegerType::get(T->getContext(), BitWidth);
367 }
368 
369 /// Convert an atomic load of a non-integral type to an integer load of the
370 /// equivalent bitwidth. See the function comment on
371 /// convertAtomicStoreToIntegerType for background.
372 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
373  auto *M = LI->getModule();
374  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
375  M->getDataLayout());
376 
377  IRBuilder<> Builder(LI);
378 
379  Value *Addr = LI->getPointerOperand();
380  Type *PT = PointerType::get(NewTy,
381  Addr->getType()->getPointerAddressSpace());
382  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
383 
384  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
385  NewLI->setAlignment(LI->getAlignment());
386  NewLI->setVolatile(LI->isVolatile());
387  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
388  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
389 
390  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
391  LI->replaceAllUsesWith(NewVal);
392  LI->eraseFromParent();
393  return NewLI;
394 }
395 
396 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
397  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
399  return false;
401  expandAtomicOpToLLSC(
402  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
403  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
404  return true;
406  return expandAtomicLoadToLL(LI);
408  return expandAtomicLoadToCmpXchg(LI);
409  default:
410  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
411  }
412 }
413 
414 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
415  IRBuilder<> Builder(LI);
416 
417  // On some architectures, load-linked instructions are atomic for larger
418  // sizes than normal loads. For example, the only 64-bit load guaranteed
419  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
420  Value *Val =
421  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
422  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
423 
424  LI->replaceAllUsesWith(Val);
425  LI->eraseFromParent();
426 
427  return true;
428 }
429 
430 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
431  IRBuilder<> Builder(LI);
432  AtomicOrdering Order = LI->getOrdering();
433  if (Order == AtomicOrdering::Unordered)
435 
436  Value *Addr = LI->getPointerOperand();
437  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
438  Constant *DummyVal = Constant::getNullValue(Ty);
439 
440  Value *Pair = Builder.CreateAtomicCmpXchg(
441  Addr, DummyVal, DummyVal, Order,
443  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
444 
445  LI->replaceAllUsesWith(Loaded);
446  LI->eraseFromParent();
447 
448  return true;
449 }
450 
451 /// Convert an atomic store of a non-integral type to an integer store of the
452 /// equivalent bitwidth. We used to not support floating point or vector
453 /// atomics in the IR at all. The backends learned to deal with the bitcast
454 /// idiom because that was the only way of expressing the notion of a atomic
455 /// float or vector store. The long term plan is to teach each backend to
456 /// instruction select from the original atomic store, but as a migration
457 /// mechanism, we convert back to the old format which the backends understand.
458 /// Each backend will need individual work to recognize the new format.
459 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
460  IRBuilder<> Builder(SI);
461  auto *M = SI->getModule();
462  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
463  M->getDataLayout());
464  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
465 
466  Value *Addr = SI->getPointerOperand();
467  Type *PT = PointerType::get(NewTy,
468  Addr->getType()->getPointerAddressSpace());
469  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
470 
471  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
472  NewSI->setAlignment(SI->getAlignment());
473  NewSI->setVolatile(SI->isVolatile());
474  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
475  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
476  SI->eraseFromParent();
477  return NewSI;
478 }
479 
480 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
481  // This function is only called on atomic stores that are too large to be
482  // atomic if implemented as a native store. So we replace them by an
483  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
484  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
485  // It is the responsibility of the target to only signal expansion via
486  // shouldExpandAtomicRMW in cases where this is required and possible.
487  IRBuilder<> Builder(SI);
488  AtomicRMWInst *AI =
490  SI->getValueOperand(), SI->getOrdering());
491  SI->eraseFromParent();
492 
493  // Now we have an appropriate swap instruction, lower it as usual.
494  return tryExpandAtomicRMW(AI);
495 }
496 
497 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
498  Value *Loaded, Value *NewVal,
499  AtomicOrdering MemOpOrder,
500  Value *&Success, Value *&NewLoaded) {
501  Type *OrigTy = NewVal->getType();
502 
503  // This code can go away when cmpxchg supports FP types.
504  bool NeedBitcast = OrigTy->isFloatingPointTy();
505  if (NeedBitcast) {
506  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
507  unsigned AS = Addr->getType()->getPointerAddressSpace();
508  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
509  NewVal = Builder.CreateBitCast(NewVal, IntTy);
510  Loaded = Builder.CreateBitCast(Loaded, IntTy);
511  }
512 
513  Value* Pair = Builder.CreateAtomicCmpXchg(
514  Addr, Loaded, NewVal, MemOpOrder,
516  Success = Builder.CreateExtractValue(Pair, 1, "success");
517  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
518 
519  if (NeedBitcast)
520  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
521 }
522 
523 /// Emit IR to implement the given atomicrmw operation on values in registers,
524 /// returning the new value.
526  Value *Loaded, Value *Inc) {
527  Value *NewVal;
528  switch (Op) {
529  case AtomicRMWInst::Xchg:
530  return Inc;
531  case AtomicRMWInst::Add:
532  return Builder.CreateAdd(Loaded, Inc, "new");
533  case AtomicRMWInst::Sub:
534  return Builder.CreateSub(Loaded, Inc, "new");
535  case AtomicRMWInst::And:
536  return Builder.CreateAnd(Loaded, Inc, "new");
537  case AtomicRMWInst::Nand:
538  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
539  case AtomicRMWInst::Or:
540  return Builder.CreateOr(Loaded, Inc, "new");
541  case AtomicRMWInst::Xor:
542  return Builder.CreateXor(Loaded, Inc, "new");
543  case AtomicRMWInst::Max:
544  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
545  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
546  case AtomicRMWInst::Min:
547  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
548  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
549  case AtomicRMWInst::UMax:
550  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
551  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
552  case AtomicRMWInst::UMin:
553  NewVal = Builder.CreateICmpULE(Loaded, Inc);
554  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
555  case AtomicRMWInst::FAdd:
556  return Builder.CreateFAdd(Loaded, Inc, "new");
557  case AtomicRMWInst::FSub:
558  return Builder.CreateFSub(Loaded, Inc, "new");
559  default:
560  llvm_unreachable("Unknown atomic op");
561  }
562 }
563 
564 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
565  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
567  return false;
569  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
570  unsigned ValueSize = getAtomicOpSize(AI);
571  if (ValueSize < MinCASSize) {
573  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
574  } else {
575  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
576  return performAtomicOp(AI->getOperation(), Builder, Loaded,
577  AI->getValOperand());
578  };
579  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
580  AI->getOrdering(), PerformOp);
581  }
582  return true;
583  }
585  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
586  unsigned ValueSize = getAtomicOpSize(AI);
587  if (ValueSize < MinCASSize) {
588  // TODO: Handle atomicrmw fadd/fsub
589  if (AI->getType()->isFloatingPointTy())
590  return false;
591 
592  expandPartwordAtomicRMW(AI,
594  } else {
596  }
597  return true;
598  }
600  expandAtomicRMWToMaskedIntrinsic(AI);
601  return true;
602  }
603  default:
604  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
605  }
606 }
607 
608 namespace {
609 
610 /// Result values from createMaskInstrs helper.
611 struct PartwordMaskValues {
612  Type *WordType;
613  Type *ValueType;
614  Value *AlignedAddr;
615  Value *ShiftAmt;
616  Value *Mask;
617  Value *Inv_Mask;
618 };
619 
620 } // end anonymous namespace
621 
622 /// This is a helper function which builds instructions to provide
623 /// values necessary for partword atomic operations. It takes an
624 /// incoming address, Addr, and ValueType, and constructs the address,
625 /// shift-amounts and masks needed to work with a larger value of size
626 /// WordSize.
627 ///
628 /// AlignedAddr: Addr rounded down to a multiple of WordSize
629 ///
630 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
631 /// from AlignAddr for it to have the same value as if
632 /// ValueType was loaded from Addr.
633 ///
634 /// Mask: Value to mask with the value loaded from AlignAddr to
635 /// include only the part that would've been loaded from Addr.
636 ///
637 /// Inv_Mask: The inverse of Mask.
638 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
639  Type *ValueType, Value *Addr,
640  unsigned WordSize) {
641  PartwordMaskValues Ret;
642 
643  BasicBlock *BB = I->getParent();
644  Function *F = BB->getParent();
645  Module *M = I->getModule();
646 
647  LLVMContext &Ctx = F->getContext();
648  const DataLayout &DL = M->getDataLayout();
649 
650  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
651 
652  assert(ValueSize < WordSize);
653 
654  Ret.ValueType = ValueType;
655  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
656 
657  Type *WordPtrType =
658  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
659 
660  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
661  Ret.AlignedAddr = Builder.CreateIntToPtr(
662  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
663  "AlignedAddr");
664 
665  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
666  if (DL.isLittleEndian()) {
667  // turn bytes into bits
668  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
669  } else {
670  // turn bytes into bits, and count from the other side.
671  Ret.ShiftAmt =
672  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
673  }
674 
675  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
676  Ret.Mask = Builder.CreateShl(
677  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
678  "Mask");
679  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
680 
681  return Ret;
682 }
683 
684 /// Emit IR to implement a masked version of a given atomicrmw
685 /// operation. (That is, only the bits under the Mask should be
686 /// affected by the operation)
688  IRBuilder<> &Builder, Value *Loaded,
689  Value *Shifted_Inc, Value *Inc,
690  const PartwordMaskValues &PMV) {
691  // TODO: update to use
692  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
693  // to merge bits from two values without requiring PMV.Inv_Mask.
694  switch (Op) {
695  case AtomicRMWInst::Xchg: {
696  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
697  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
698  return FinalVal;
699  }
700  case AtomicRMWInst::Or:
701  case AtomicRMWInst::Xor:
702  case AtomicRMWInst::And:
703  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
704  case AtomicRMWInst::Add:
705  case AtomicRMWInst::Sub:
706  case AtomicRMWInst::Nand: {
707  // The other arithmetic ops need to be masked into place.
708  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
709  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
710  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
711  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
712  return FinalVal;
713  }
714  case AtomicRMWInst::Max:
715  case AtomicRMWInst::Min:
716  case AtomicRMWInst::UMax:
717  case AtomicRMWInst::UMin: {
718  // Finally, comparison ops will operate on the full value, so
719  // truncate down to the original size, and expand out again after
720  // doing the operation.
721  Value *Loaded_Shiftdown = Builder.CreateTrunc(
722  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
723  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
724  Value *NewVal_Shiftup = Builder.CreateShl(
725  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
726  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
727  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
728  return FinalVal;
729  }
730  default:
731  llvm_unreachable("Unknown atomic op");
732  }
733 }
734 
735 /// Expand a sub-word atomicrmw operation into an appropriate
736 /// word-sized operation.
737 ///
738 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
739 /// way as a typical atomicrmw expansion. The only difference here is
740 /// that the operation inside of the loop must operate only upon a
741 /// part of the value.
742 void AtomicExpand::expandPartwordAtomicRMW(
745 
746  AtomicOrdering MemOpOrder = AI->getOrdering();
747 
748  IRBuilder<> Builder(AI);
749 
750  PartwordMaskValues PMV =
751  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
752  TLI->getMinCmpXchgSizeInBits() / 8);
753 
754  Value *ValOperand_Shifted =
755  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
756  PMV.ShiftAmt, "ValOperand_Shifted");
757 
758  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
759  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
760  ValOperand_Shifted, AI->getValOperand(), PMV);
761  };
762 
763  // TODO: When we're ready to support LLSC conversions too, use
764  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
765  Value *OldResult =
766  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
767  PerformPartwordOp, createCmpXchgInstFun);
768  Value *FinalOldResult = Builder.CreateTrunc(
769  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
770  AI->replaceAllUsesWith(FinalOldResult);
771  AI->eraseFromParent();
772 }
773 
774 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
775 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
776  IRBuilder<> Builder(AI);
778 
779  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
780  Op == AtomicRMWInst::And) &&
781  "Unable to widen operation");
782 
783  PartwordMaskValues PMV =
784  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
785  TLI->getMinCmpXchgSizeInBits() / 8);
786 
787  Value *ValOperand_Shifted =
788  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
789  PMV.ShiftAmt, "ValOperand_Shifted");
790 
791  Value *NewOperand;
792 
793  if (Op == AtomicRMWInst::And)
794  NewOperand =
795  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
796  else
797  NewOperand = ValOperand_Shifted;
798 
799  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
800  NewOperand, AI->getOrdering());
801 
802  Value *FinalOldResult = Builder.CreateTrunc(
803  Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
804  AI->replaceAllUsesWith(FinalOldResult);
805  AI->eraseFromParent();
806  return NewAI;
807 }
808 
809 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
810  // The basic idea here is that we're expanding a cmpxchg of a
811  // smaller memory size up to a word-sized cmpxchg. To do this, we
812  // need to add a retry-loop for strong cmpxchg, so that
813  // modifications to other parts of the word don't cause a spurious
814  // failure.
815 
816  // This generates code like the following:
817  // [[Setup mask values PMV.*]]
818  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
819  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
820  // %InitLoaded = load i32* %addr
821  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
822  // br partword.cmpxchg.loop
823  // partword.cmpxchg.loop:
824  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
825  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
826  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
827  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
828  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
829  // i32 %FullWord_NewVal success_ordering failure_ordering
830  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
831  // %Success = extractvalue { i32, i1 } %NewCI, 1
832  // br i1 %Success, label %partword.cmpxchg.end,
833  // label %partword.cmpxchg.failure
834  // partword.cmpxchg.failure:
835  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
836  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
837  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
838  // label %partword.cmpxchg.end
839  // partword.cmpxchg.end:
840  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
841  // %FinalOldVal = trunc i32 %tmp1 to i8
842  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
843  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
844 
845  Value *Addr = CI->getPointerOperand();
846  Value *Cmp = CI->getCompareOperand();
847  Value *NewVal = CI->getNewValOperand();
848 
849  BasicBlock *BB = CI->getParent();
850  Function *F = BB->getParent();
851  IRBuilder<> Builder(CI);
852  LLVMContext &Ctx = Builder.getContext();
853 
854  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
855 
856  BasicBlock *EndBB =
857  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
858  auto FailureBB =
859  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
860  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
861 
862  // The split call above "helpfully" added a branch at the end of BB
863  // (to the wrong place).
864  std::prev(BB->end())->eraseFromParent();
865  Builder.SetInsertPoint(BB);
866 
867  PartwordMaskValues PMV = createMaskInstrs(
868  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
869 
870  // Shift the incoming values over, into the right location in the word.
871  Value *NewVal_Shifted =
872  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
873  Value *Cmp_Shifted =
874  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
875 
876  // Load the entire current word, and mask into place the expected and new
877  // values
878  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
879  InitLoaded->setVolatile(CI->isVolatile());
880  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
881  Builder.CreateBr(LoopBB);
882 
883  // partword.cmpxchg.loop:
884  Builder.SetInsertPoint(LoopBB);
885  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
886  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
887 
888  // Mask/Or the expected and new values into place in the loaded word.
889  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
890  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
891  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
892  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
893  CI->getFailureOrdering(), CI->getSyncScopeID());
894  NewCI->setVolatile(CI->isVolatile());
895  // When we're building a strong cmpxchg, we need a loop, so you
896  // might think we could use a weak cmpxchg inside. But, using strong
897  // allows the below comparison for ShouldContinue, and we're
898  // expecting the underlying cmpxchg to be a machine instruction,
899  // which is strong anyways.
900  NewCI->setWeak(CI->isWeak());
901 
902  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
903  Value *Success = Builder.CreateExtractValue(NewCI, 1);
904 
905  if (CI->isWeak())
906  Builder.CreateBr(EndBB);
907  else
908  Builder.CreateCondBr(Success, EndBB, FailureBB);
909 
910  // partword.cmpxchg.failure:
911  Builder.SetInsertPoint(FailureBB);
912  // Upon failure, verify that the masked-out part of the loaded value
913  // has been modified. If it didn't, abort the cmpxchg, since the
914  // masked-in part must've.
915  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
916  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
917  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
918 
919  // Add the second value to the phi from above
920  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
921 
922  // partword.cmpxchg.end:
923  Builder.SetInsertPoint(CI);
924 
925  Value *FinalOldVal = Builder.CreateTrunc(
926  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
927  Value *Res = UndefValue::get(CI->getType());
928  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
929  Res = Builder.CreateInsertValue(Res, Success, 1);
930 
931  CI->replaceAllUsesWith(Res);
932  CI->eraseFromParent();
933 }
934 
935 void AtomicExpand::expandAtomicOpToLLSC(
936  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
937  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
938  IRBuilder<> Builder(I);
939  Value *Loaded =
940  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
941 
942  I->replaceAllUsesWith(Loaded);
943  I->eraseFromParent();
944 }
945 
946 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
947  IRBuilder<> Builder(AI);
948 
949  PartwordMaskValues PMV =
950  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
951  TLI->getMinCmpXchgSizeInBits() / 8);
952 
953  // The value operand must be sign-extended for signed min/max so that the
954  // target's signed comparison instructions can be used. Otherwise, just
955  // zero-ext.
956  Instruction::CastOps CastOp = Instruction::ZExt;
957  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
958  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
959  CastOp = Instruction::SExt;
960 
961  Value *ValOperand_Shifted = Builder.CreateShl(
962  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
963  PMV.ShiftAmt, "ValOperand_Shifted");
964  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
965  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
966  AI->getOrdering());
967  Value *FinalOldResult = Builder.CreateTrunc(
968  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
969  AI->replaceAllUsesWith(FinalOldResult);
970  AI->eraseFromParent();
971 }
972 
973 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
974  IRBuilder<> Builder(CI);
975 
976  PartwordMaskValues PMV = createMaskInstrs(
977  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
978  TLI->getMinCmpXchgSizeInBits() / 8);
979 
980  Value *CmpVal_Shifted = Builder.CreateShl(
981  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
982  "CmpVal_Shifted");
983  Value *NewVal_Shifted = Builder.CreateShl(
984  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
985  "NewVal_Shifted");
986  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
987  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
988  CI->getSuccessOrdering());
989  Value *FinalOldVal = Builder.CreateTrunc(
990  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
991 
992  Value *Res = UndefValue::get(CI->getType());
993  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
994  Value *Success = Builder.CreateICmpEQ(
995  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
996  Res = Builder.CreateInsertValue(Res, Success, 1);
997 
998  CI->replaceAllUsesWith(Res);
999  CI->eraseFromParent();
1000 }
1001 
1002 Value *AtomicExpand::insertRMWLLSCLoop(
1003  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1004  AtomicOrdering MemOpOrder,
1005  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1006  LLVMContext &Ctx = Builder.getContext();
1007  BasicBlock *BB = Builder.GetInsertBlock();
1008  Function *F = BB->getParent();
1009 
1010  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1011  //
1012  // The standard expansion we produce is:
1013  // [...]
1014  // atomicrmw.start:
1015  // %loaded = @load.linked(%addr)
1016  // %new = some_op iN %loaded, %incr
1017  // %stored = @store_conditional(%new, %addr)
1018  // %try_again = icmp i32 ne %stored, 0
1019  // br i1 %try_again, label %loop, label %atomicrmw.end
1020  // atomicrmw.end:
1021  // [...]
1022  BasicBlock *ExitBB =
1023  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1024  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1025 
1026  // The split call above "helpfully" added a branch at the end of BB (to the
1027  // wrong place).
1028  std::prev(BB->end())->eraseFromParent();
1029  Builder.SetInsertPoint(BB);
1030  Builder.CreateBr(LoopBB);
1031 
1032  // Start the main loop block now that we've taken care of the preliminaries.
1033  Builder.SetInsertPoint(LoopBB);
1034  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1035 
1036  Value *NewVal = PerformOp(Builder, Loaded);
1037 
1038  Value *StoreSuccess =
1039  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1040  Value *TryAgain = Builder.CreateICmpNE(
1041  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1042  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1043 
1044  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1045  return Loaded;
1046 }
1047 
1048 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1049 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1050 /// IR. As a migration step, we convert back to what use to be the standard
1051 /// way to represent a pointer cmpxchg so that we can update backends one by
1052 /// one.
1053 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1054  auto *M = CI->getModule();
1055  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1056  M->getDataLayout());
1057 
1058  IRBuilder<> Builder(CI);
1059 
1060  Value *Addr = CI->getPointerOperand();
1061  Type *PT = PointerType::get(NewTy,
1062  Addr->getType()->getPointerAddressSpace());
1063  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1064 
1065  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1066  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1067 
1068 
1069  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
1070  CI->getSuccessOrdering(),
1071  CI->getFailureOrdering(),
1072  CI->getSyncScopeID());
1073  NewCI->setVolatile(CI->isVolatile());
1074  NewCI->setWeak(CI->isWeak());
1075  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1076 
1077  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1078  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1079 
1080  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1081 
1082  Value *Res = UndefValue::get(CI->getType());
1083  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1084  Res = Builder.CreateInsertValue(Res, Succ, 1);
1085 
1086  CI->replaceAllUsesWith(Res);
1087  CI->eraseFromParent();
1088  return NewCI;
1089 }
1090 
1091 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1092  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1093  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1094  Value *Addr = CI->getPointerOperand();
1095  BasicBlock *BB = CI->getParent();
1096  Function *F = BB->getParent();
1097  LLVMContext &Ctx = F->getContext();
1098  // If shouldInsertFencesForAtomic() returns true, then the target does not
1099  // want to deal with memory orders, and emitLeading/TrailingFence should take
1100  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1101  // should preserve the ordering.
1102  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1103  AtomicOrdering MemOpOrder =
1104  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1105 
1106  // In implementations which use a barrier to achieve release semantics, we can
1107  // delay emitting this barrier until we know a store is actually going to be
1108  // attempted. The cost of this delay is that we need 2 copies of the block
1109  // emitting the load-linked, affecting code size.
1110  //
1111  // Ideally, this logic would be unconditional except for the minsize check
1112  // since in other cases the extra blocks naturally collapse down to the
1113  // minimal loop. Unfortunately, this puts too much stress on later
1114  // optimisations so we avoid emitting the extra logic in those cases too.
1115  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1116  SuccessOrder != AtomicOrdering::Monotonic &&
1117  SuccessOrder != AtomicOrdering::Acquire &&
1118  !F->hasMinSize();
1119 
1120  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1121  // do it even on minsize.
1122  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1123 
1124  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1125  //
1126  // The full expansion we produce is:
1127  // [...]
1128  // cmpxchg.start:
1129  // %unreleasedload = @load.linked(%addr)
1130  // %should_store = icmp eq %unreleasedload, %desired
1131  // br i1 %should_store, label %cmpxchg.fencedstore,
1132  // label %cmpxchg.nostore
1133  // cmpxchg.releasingstore:
1134  // fence?
1135  // br label cmpxchg.trystore
1136  // cmpxchg.trystore:
1137  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1138  // [%releasedload, %cmpxchg.releasedload]
1139  // %stored = @store_conditional(%new, %addr)
1140  // %success = icmp eq i32 %stored, 0
1141  // br i1 %success, label %cmpxchg.success,
1142  // label %cmpxchg.releasedload/%cmpxchg.failure
1143  // cmpxchg.releasedload:
1144  // %releasedload = @load.linked(%addr)
1145  // %should_store = icmp eq %releasedload, %desired
1146  // br i1 %should_store, label %cmpxchg.trystore,
1147  // label %cmpxchg.failure
1148  // cmpxchg.success:
1149  // fence?
1150  // br label %cmpxchg.end
1151  // cmpxchg.nostore:
1152  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1153  // [%releasedload,
1154  // %cmpxchg.releasedload/%cmpxchg.trystore]
1155  // @load_linked_fail_balance()?
1156  // br label %cmpxchg.failure
1157  // cmpxchg.failure:
1158  // fence?
1159  // br label %cmpxchg.end
1160  // cmpxchg.end:
1161  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1162  // [%loaded.trystore, %cmpxchg.trystore]
1163  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1164  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1165  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1166  // [...]
1167  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1168  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1169  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1170  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1171  auto ReleasedLoadBB =
1172  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1173  auto TryStoreBB =
1174  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1175  auto ReleasingStoreBB =
1176  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1177  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1178 
1179  // This grabs the DebugLoc from CI
1180  IRBuilder<> Builder(CI);
1181 
1182  // The split call above "helpfully" added a branch at the end of BB (to the
1183  // wrong place), but we might want a fence too. It's easiest to just remove
1184  // the branch entirely.
1185  std::prev(BB->end())->eraseFromParent();
1186  Builder.SetInsertPoint(BB);
1187  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1188  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1189  Builder.CreateBr(StartBB);
1190 
1191  // Start the main loop block now that we've taken care of the preliminaries.
1192  Builder.SetInsertPoint(StartBB);
1193  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1194  Value *ShouldStore = Builder.CreateICmpEQ(
1195  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1196 
1197  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1198  // jump straight past that fence instruction (if it exists).
1199  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1200 
1201  Builder.SetInsertPoint(ReleasingStoreBB);
1202  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1203  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1204  Builder.CreateBr(TryStoreBB);
1205 
1206  Builder.SetInsertPoint(TryStoreBB);
1207  Value *StoreSuccess = TLI->emitStoreConditional(
1208  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1209  StoreSuccess = Builder.CreateICmpEQ(
1210  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1211  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1212  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1213  CI->isWeak() ? FailureBB : RetryBB);
1214 
1215  Builder.SetInsertPoint(ReleasedLoadBB);
1216  Value *SecondLoad;
1217  if (HasReleasedLoadBB) {
1218  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1219  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1220  "should_store");
1221 
1222  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1223  // jump straight past that fence instruction (if it exists).
1224  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1225  } else
1226  Builder.CreateUnreachable();
1227 
1228  // Make sure later instructions don't get reordered with a fence if
1229  // necessary.
1230  Builder.SetInsertPoint(SuccessBB);
1231  if (ShouldInsertFencesForAtomic)
1232  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1233  Builder.CreateBr(ExitBB);
1234 
1235  Builder.SetInsertPoint(NoStoreBB);
1236  // In the failing case, where we don't execute the store-conditional, the
1237  // target might want to balance out the load-linked with a dedicated
1238  // instruction (e.g., on ARM, clearing the exclusive monitor).
1239  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1240  Builder.CreateBr(FailureBB);
1241 
1242  Builder.SetInsertPoint(FailureBB);
1243  if (ShouldInsertFencesForAtomic)
1244  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1245  Builder.CreateBr(ExitBB);
1246 
1247  // Finally, we have control-flow based knowledge of whether the cmpxchg
1248  // succeeded or not. We expose this to later passes by converting any
1249  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1250  // PHI.
1251  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1252  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1253  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1254  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1255 
1256  // Setup the builder so we can create any PHIs we need.
1257  Value *Loaded;
1258  if (!HasReleasedLoadBB)
1259  Loaded = UnreleasedLoad;
1260  else {
1261  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1262  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1263  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1264  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1265 
1266  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1267  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1268  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1269  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1270 
1271  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1272  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1273  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1274  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1275 
1276  Loaded = ExitLoaded;
1277  }
1278 
1279  // Look for any users of the cmpxchg that are just comparing the loaded value
1280  // against the desired one, and replace them with the CFG-derived version.
1282  for (auto User : CI->users()) {
1284  if (!EV)
1285  continue;
1286 
1287  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1288  "weird extraction from { iN, i1 }");
1289 
1290  if (EV->getIndices()[0] == 0)
1291  EV->replaceAllUsesWith(Loaded);
1292  else
1293  EV->replaceAllUsesWith(Success);
1294 
1295  PrunedInsts.push_back(EV);
1296  }
1297 
1298  // We can remove the instructions now we're no longer iterating through them.
1299  for (auto EV : PrunedInsts)
1300  EV->eraseFromParent();
1301 
1302  if (!CI->use_empty()) {
1303  // Some use of the full struct return that we don't understand has happened,
1304  // so we've got to reconstruct it properly.
1305  Value *Res;
1306  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1307  Res = Builder.CreateInsertValue(Res, Success, 1);
1308 
1309  CI->replaceAllUsesWith(Res);
1310  }
1311 
1312  CI->eraseFromParent();
1313  return true;
1314 }
1315 
1316 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1317  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1318  if(!C)
1319  return false;
1320 
1322  switch(Op) {
1323  case AtomicRMWInst::Add:
1324  case AtomicRMWInst::Sub:
1325  case AtomicRMWInst::Or:
1326  case AtomicRMWInst::Xor:
1327  return C->isZero();
1328  case AtomicRMWInst::And:
1329  return C->isMinusOne();
1330  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1331  default:
1332  return false;
1333  }
1334 }
1335 
1336 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1337  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1338  tryExpandAtomicLoad(ResultingLoad);
1339  return true;
1340  }
1341  return false;
1342 }
1343 
1344 Value *AtomicExpand::insertRMWCmpXchgLoop(
1345  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1346  AtomicOrdering MemOpOrder,
1347  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1348  CreateCmpXchgInstFun CreateCmpXchg) {
1349  LLVMContext &Ctx = Builder.getContext();
1350  BasicBlock *BB = Builder.GetInsertBlock();
1351  Function *F = BB->getParent();
1352 
1353  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1354  //
1355  // The standard expansion we produce is:
1356  // [...]
1357  // %init_loaded = load atomic iN* %addr
1358  // br label %loop
1359  // loop:
1360  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1361  // %new = some_op iN %loaded, %incr
1362  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1363  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1364  // %success = extractvalue { iN, i1 } %pair, 1
1365  // br i1 %success, label %atomicrmw.end, label %loop
1366  // atomicrmw.end:
1367  // [...]
1368  BasicBlock *ExitBB =
1369  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1370  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1371 
1372  // The split call above "helpfully" added a branch at the end of BB (to the
1373  // wrong place), but we want a load. It's easiest to just remove
1374  // the branch entirely.
1375  std::prev(BB->end())->eraseFromParent();
1376  Builder.SetInsertPoint(BB);
1377  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1378  // Atomics require at least natural alignment.
1379  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1380  Builder.CreateBr(LoopBB);
1381 
1382  // Start the main loop block now that we've taken care of the preliminaries.
1383  Builder.SetInsertPoint(LoopBB);
1384  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1385  Loaded->addIncoming(InitLoaded, BB);
1386 
1387  Value *NewVal = PerformOp(Builder, Loaded);
1388 
1389  Value *NewLoaded = nullptr;
1390  Value *Success = nullptr;
1391 
1392  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1393  MemOpOrder == AtomicOrdering::Unordered
1395  : MemOpOrder,
1396  Success, NewLoaded);
1397  assert(Success && NewLoaded);
1398 
1399  Loaded->addIncoming(NewLoaded, LoopBB);
1400 
1401  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1402 
1403  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1404  return NewLoaded;
1405 }
1406 
1407 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1408  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1409  unsigned ValueSize = getAtomicOpSize(CI);
1410 
1411  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1412  default:
1413  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1415  if (ValueSize < MinCASSize)
1416  expandPartwordCmpXchg(CI);
1417  return false;
1419  assert(ValueSize >= MinCASSize &&
1420  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
1421  return expandAtomicCmpXchg(CI);
1422  }
1424  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1425  return true;
1426  }
1427 }
1428 
1429 // Note: This function is exposed externally by AtomicExpandUtils.h
1431  CreateCmpXchgInstFun CreateCmpXchg) {
1432  IRBuilder<> Builder(AI);
1433  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1434  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1435  [&](IRBuilder<> &Builder, Value *Loaded) {
1436  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1437  AI->getValOperand());
1438  },
1439  CreateCmpXchg);
1440 
1441  AI->replaceAllUsesWith(Loaded);
1442  AI->eraseFromParent();
1443  return true;
1444 }
1445 
1446 // In order to use one of the sized library calls such as
1447 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1448 // must be one of the potentially-specialized sizes, and the value
1449 // type must actually exist in C on the target (otherwise, the
1450 // function wouldn't actually be defined.)
1451 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1452  const DataLayout &DL) {
1453  // TODO: "LargestSize" is an approximation for "largest type that
1454  // you can express in C". It seems to be the case that int128 is
1455  // supported on all 64-bit platforms, otherwise only up to 64-bit
1456  // integers are supported. If we get this wrong, then we'll try to
1457  // call a sized libcall that doesn't actually exist. There should
1458  // really be some more reliable way in LLVM of determining integer
1459  // sizes which are valid in the target's C ABI...
1460  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1461  return Align >= Size &&
1462  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1463  Size <= LargestSize;
1464 }
1465 
1466 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1467  static const RTLIB::Libcall Libcalls[6] = {
1468  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1469  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1470  unsigned Size = getAtomicOpSize(I);
1471  unsigned Align = getAtomicOpAlign(I);
1472 
1473  bool expanded = expandAtomicOpToLibcall(
1474  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1475  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1476  (void)expanded;
1477  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1478 }
1479 
1480 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1481  static const RTLIB::Libcall Libcalls[6] = {
1482  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1483  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1484  unsigned Size = getAtomicOpSize(I);
1485  unsigned Align = getAtomicOpAlign(I);
1486 
1487  bool expanded = expandAtomicOpToLibcall(
1488  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1489  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1490  (void)expanded;
1491  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1492 }
1493 
1494 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1495  static const RTLIB::Libcall Libcalls[6] = {
1496  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1497  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1498  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1499  unsigned Size = getAtomicOpSize(I);
1500  unsigned Align = getAtomicOpAlign(I);
1501 
1502  bool expanded = expandAtomicOpToLibcall(
1503  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1505  Libcalls);
1506  (void)expanded;
1507  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1508 }
1509 
1511  static const RTLIB::Libcall LibcallsXchg[6] = {
1512  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1513  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1514  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1515  static const RTLIB::Libcall LibcallsAdd[6] = {
1516  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1517  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1518  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1519  static const RTLIB::Libcall LibcallsSub[6] = {
1520  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1521  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1522  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1523  static const RTLIB::Libcall LibcallsAnd[6] = {
1524  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1525  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1526  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1527  static const RTLIB::Libcall LibcallsOr[6] = {
1528  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1529  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1530  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1531  static const RTLIB::Libcall LibcallsXor[6] = {
1532  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1533  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1534  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1535  static const RTLIB::Libcall LibcallsNand[6] = {
1536  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1537  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1538  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1539 
1540  switch (Op) {
1542  llvm_unreachable("Should not have BAD_BINOP.");
1543  case AtomicRMWInst::Xchg:
1544  return makeArrayRef(LibcallsXchg);
1545  case AtomicRMWInst::Add:
1546  return makeArrayRef(LibcallsAdd);
1547  case AtomicRMWInst::Sub:
1548  return makeArrayRef(LibcallsSub);
1549  case AtomicRMWInst::And:
1550  return makeArrayRef(LibcallsAnd);
1551  case AtomicRMWInst::Or:
1552  return makeArrayRef(LibcallsOr);
1553  case AtomicRMWInst::Xor:
1554  return makeArrayRef(LibcallsXor);
1555  case AtomicRMWInst::Nand:
1556  return makeArrayRef(LibcallsNand);
1557  case AtomicRMWInst::Max:
1558  case AtomicRMWInst::Min:
1559  case AtomicRMWInst::UMax:
1560  case AtomicRMWInst::UMin:
1561  case AtomicRMWInst::FAdd:
1562  case AtomicRMWInst::FSub:
1563  // No atomic libcalls are available for max/min/umax/umin.
1564  return {};
1565  }
1566  llvm_unreachable("Unexpected AtomicRMW operation.");
1567 }
1568 
1569 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1571 
1572  unsigned Size = getAtomicOpSize(I);
1573  unsigned Align = getAtomicOpAlign(I);
1574 
1575  bool Success = false;
1576  if (!Libcalls.empty())
1577  Success = expandAtomicOpToLibcall(
1578  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1579  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1580 
1581  // The expansion failed: either there were no libcalls at all for
1582  // the operation (min/max), or there were only size-specialized
1583  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1584  // CAS libcall, via a CAS loop, instead.
1585  if (!Success) {
1586  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1587  Value *Loaded, Value *NewVal,
1588  AtomicOrdering MemOpOrder,
1589  Value *&Success, Value *&NewLoaded) {
1590  // Create the CAS instruction normally...
1591  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1592  Addr, Loaded, NewVal, MemOpOrder,
1594  Success = Builder.CreateExtractValue(Pair, 1, "success");
1595  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1596 
1597  // ...and then expand the CAS into a libcall.
1598  expandAtomicCASToLibcall(Pair);
1599  });
1600  }
1601 }
1602 
1603 // A helper routine for the above expandAtomic*ToLibcall functions.
1604 //
1605 // 'Libcalls' contains an array of enum values for the particular
1606 // ATOMIC libcalls to be emitted. All of the other arguments besides
1607 // 'I' are extracted from the Instruction subclass by the
1608 // caller. Depending on the particular call, some will be null.
1609 bool AtomicExpand::expandAtomicOpToLibcall(
1610  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1611  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1612  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1613  assert(Libcalls.size() == 6);
1614 
1615  LLVMContext &Ctx = I->getContext();
1616  Module *M = I->getModule();
1617  const DataLayout &DL = M->getDataLayout();
1618  IRBuilder<> Builder(I);
1619  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1620 
1621  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1622  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1623 
1624  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1625 
1626  // TODO: the "order" argument type is "int", not int32. So
1627  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1628  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1629  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1630  Constant *OrderingVal =
1631  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1632  Constant *Ordering2Val = nullptr;
1633  if (CASExpected) {
1634  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1635  Ordering2Val =
1636  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1637  }
1638  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1639 
1640  RTLIB::Libcall RTLibType;
1641  if (UseSizedLibcall) {
1642  switch (Size) {
1643  case 1: RTLibType = Libcalls[1]; break;
1644  case 2: RTLibType = Libcalls[2]; break;
1645  case 4: RTLibType = Libcalls[3]; break;
1646  case 8: RTLibType = Libcalls[4]; break;
1647  case 16: RTLibType = Libcalls[5]; break;
1648  }
1649  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1650  RTLibType = Libcalls[0];
1651  } else {
1652  // Can't use sized function, and there's no generic for this
1653  // operation, so give up.
1654  return false;
1655  }
1656 
1657  // Build up the function call. There's two kinds. First, the sized
1658  // variants. These calls are going to be one of the following (with
1659  // N=1,2,4,8,16):
1660  // iN __atomic_load_N(iN *ptr, int ordering)
1661  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1662  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1663  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1664  // int success_order, int failure_order)
1665  //
1666  // Note that these functions can be used for non-integer atomic
1667  // operations, the values just need to be bitcast to integers on the
1668  // way in and out.
1669  //
1670  // And, then, the generic variants. They look like the following:
1671  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1672  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1673  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1674  // int ordering)
1675  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1676  // void *desired, int success_order,
1677  // int failure_order)
1678  //
1679  // The different signatures are built up depending on the
1680  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1681  // variables.
1682 
1683  AllocaInst *AllocaCASExpected = nullptr;
1684  Value *AllocaCASExpected_i8 = nullptr;
1685  AllocaInst *AllocaValue = nullptr;
1686  Value *AllocaValue_i8 = nullptr;
1687  AllocaInst *AllocaResult = nullptr;
1688  Value *AllocaResult_i8 = nullptr;
1689 
1690  Type *ResultTy;
1692  AttributeList Attr;
1693 
1694  // 'size' argument.
1695  if (!UseSizedLibcall) {
1696  // Note, getIntPtrType is assumed equivalent to size_t.
1697  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1698  }
1699 
1700  // 'ptr' argument.
1701  // note: This assumes all address spaces share a common libfunc
1702  // implementation and that addresses are convertable. For systems without
1703  // that property, we'd need to extend this mechanism to support AS-specific
1704  // families of atomic intrinsics.
1705  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1706  Value *PtrVal = Builder.CreateBitCast(PointerOperand,
1707  Type::getInt8PtrTy(Ctx, PtrTypeAS));
1708  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1709  Args.push_back(PtrVal);
1710 
1711  // 'expected' argument, if present.
1712  if (CASExpected) {
1713  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1714  AllocaCASExpected->setAlignment(AllocaAlignment);
1715  unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
1716 
1717  AllocaCASExpected_i8 =
1718  Builder.CreateBitCast(AllocaCASExpected,
1719  Type::getInt8PtrTy(Ctx, AllocaAS));
1720  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1721  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1722  Args.push_back(AllocaCASExpected_i8);
1723  }
1724 
1725  // 'val' argument ('desired' for cas), if present.
1726  if (ValueOperand) {
1727  if (UseSizedLibcall) {
1728  Value *IntValue =
1729  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1730  Args.push_back(IntValue);
1731  } else {
1732  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1733  AllocaValue->setAlignment(AllocaAlignment);
1734  AllocaValue_i8 =
1735  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1736  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1737  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1738  Args.push_back(AllocaValue_i8);
1739  }
1740  }
1741 
1742  // 'ret' argument.
1743  if (!CASExpected && HasResult && !UseSizedLibcall) {
1744  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1745  AllocaResult->setAlignment(AllocaAlignment);
1746  unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
1747  AllocaResult_i8 =
1748  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1749  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1750  Args.push_back(AllocaResult_i8);
1751  }
1752 
1753  // 'ordering' ('success_order' for cas) argument.
1754  Args.push_back(OrderingVal);
1755 
1756  // 'failure_order' argument, if present.
1757  if (Ordering2Val)
1758  Args.push_back(Ordering2Val);
1759 
1760  // Now, the return type.
1761  if (CASExpected) {
1762  ResultTy = Type::getInt1Ty(Ctx);
1763  Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1764  } else if (HasResult && UseSizedLibcall)
1765  ResultTy = SizedIntTy;
1766  else
1767  ResultTy = Type::getVoidTy(Ctx);
1768 
1769  // Done with setting up arguments and return types, create the call:
1770  SmallVector<Type *, 6> ArgTys;
1771  for (Value *Arg : Args)
1772  ArgTys.push_back(Arg->getType());
1773  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1774  FunctionCallee LibcallFn =
1775  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1776  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1777  Call->setAttributes(Attr);
1778  Value *Result = Call;
1779 
1780  // And then, extract the results...
1781  if (ValueOperand && !UseSizedLibcall)
1782  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1783 
1784  if (CASExpected) {
1785  // The final result from the CAS is {load of 'expected' alloca, bool result
1786  // from call}
1787  Type *FinalResultTy = I->getType();
1788  Value *V = UndefValue::get(FinalResultTy);
1789  Value *ExpectedOut = Builder.CreateAlignedLoad(
1790  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1791  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1792  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1793  V = Builder.CreateInsertValue(V, Result, 1);
1794  I->replaceAllUsesWith(V);
1795  } else if (HasResult) {
1796  Value *V;
1797  if (UseSizedLibcall)
1798  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1799  else {
1800  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1801  AllocaAlignment);
1802  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1803  }
1804  I->replaceAllUsesWith(V);
1805  }
1806  I->eraseFromParent();
1807  return true;
1808 }
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:606
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:890
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
*p = old <signed v ? old : v
Definition: Instructions.h:723
LLVMContext & getContext() const
Definition: IRBuilder.h:128
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2092
Atomic ordering constants.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1963
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ty, Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool...
Definition: IRBuilder.h:1576
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1320
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1612
void setAlignment(unsigned Align)
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:530
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
This class represents a function call, abstracting a target machine&#39;s calling convention.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:727
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:565
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
*p = old >unsigned v ? old : v
Definition: Instructions.h:725
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional l...
An instruction for reading from memory.
Definition: Instructions.h:167
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:693
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2108
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:395
*p = old >signed v ? old : v
Definition: Instructions.h:721
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:597
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1649
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1524
ArrayRef< unsigned > getIndices() const
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:131
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:654
BinOp getOperation() const
Definition: Instructions.h:752
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:571
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1118
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:559
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2112
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1605
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1953
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2096
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:705
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:584
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:232
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1663
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1135
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1878
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1294
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:766
static bool runOnFunction(Function &F, bool PostInlining)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1050
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2271
const Instruction & front() const
Definition: BasicBlock.h:280
bool isAcquireOrStronger(AtomicOrdering ao)
unsigned getNumIndices() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:756
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:788
static unsigned getAtomicOpSize(LoadInst *LI)
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:296
Value * getPointerOperand()
Definition: Instructions.h:284
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2088
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:40
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:396
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1671
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1436
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:610
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2335
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:791
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1874
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:642
bool isReleaseOrStronger(AtomicOrdering ao)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2218
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:414
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
Value * getValOperand()
Definition: Instructions.h:817
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:309
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
iterator end()
Definition: BasicBlock.h:270
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:643
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:599
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:356
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void setWeak(bool IsWeak)
Definition: Instructions.h:575
iterator_range< user_iterator > users()
Definition: Value.h:399
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1207
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2036
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:372
Value * getPointerOperand()
Definition: Instructions.h:813
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1358
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
static unsigned getAtomicOpAlign(LoadInst *LI)
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1370
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2223
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:407
AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1333
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1268
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1948
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1995
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:884
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:127
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1228
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:398
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:132
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2343
#define LLVM_DEBUG(X)
Definition: Debug.h:122
FunctionPass * createAtomicExpandPass()
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:234
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2124
Value * getPointerOperand()
Definition: Instructions.h:412
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
bool use_empty() const
Definition: Value.h:322
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:163