LLVM  6.0.0svn
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass (at IR level) to replace atomic instructions with
11 // __atomic_* library calls, or target specific instruction which implement the
12 // same semantics in a way which better fits the target backend. This can
13 // include the use of (intrinsic-based) load-linked/store-conditional loops,
14 // AtomicCmpXchg, or type coercions.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InstIterator.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <iterator>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "atomic-expand"
56 
57 namespace {
58 
59  class AtomicExpand: public FunctionPass {
60  const TargetLowering *TLI = nullptr;
61 
62  public:
63  static char ID; // Pass identification, replacement for typeid
64 
65  AtomicExpand() : FunctionPass(ID) {
67  }
68 
69  bool runOnFunction(Function &F) override;
70 
71  private:
72  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
73  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
74  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
75  bool tryExpandAtomicLoad(LoadInst *LI);
76  bool expandAtomicLoadToLL(LoadInst *LI);
77  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
78  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
79  bool expandAtomicStore(StoreInst *SI);
80  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
81  Value *
82  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
83  AtomicOrdering MemOpOrder,
84  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
85  void expandAtomicOpToLLSC(
86  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
87  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
88  void expandPartwordAtomicRMW(
89  AtomicRMWInst *I,
91  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92 
93  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
94  static Value *insertRMWCmpXchgLoop(
95  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
96  AtomicOrdering MemOpOrder,
97  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
98  CreateCmpXchgInstFun CreateCmpXchg);
99 
100  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
101  bool isIdempotentRMW(AtomicRMWInst *AI);
102  bool simplifyIdempotentRMW(AtomicRMWInst *AI);
103 
104  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
105  Value *PointerOperand, Value *ValueOperand,
106  Value *CASExpected, AtomicOrdering Ordering,
107  AtomicOrdering Ordering2,
108  ArrayRef<RTLIB::Libcall> Libcalls);
109  void expandAtomicLoadToLibcall(LoadInst *LI);
110  void expandAtomicStoreToLibcall(StoreInst *LI);
111  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
112  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
113 
114  friend bool
116  CreateCmpXchgInstFun CreateCmpXchg);
117  };
118 
119 } // end anonymous namespace
120 
121 char AtomicExpand::ID = 0;
122 
124 
125 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
126  false, false)
127 
128 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
129 
130 // Helper functions to retrieve the size of atomic instructions.
131 static unsigned getAtomicOpSize(LoadInst *LI) {
132  const DataLayout &DL = LI->getModule()->getDataLayout();
133  return DL.getTypeStoreSize(LI->getType());
134 }
135 
136 static unsigned getAtomicOpSize(StoreInst *SI) {
137  const DataLayout &DL = SI->getModule()->getDataLayout();
138  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
139 }
140 
141 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
142  const DataLayout &DL = RMWI->getModule()->getDataLayout();
143  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
144 }
145 
146 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
147  const DataLayout &DL = CASI->getModule()->getDataLayout();
148  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
149 }
150 
151 // Helper functions to retrieve the alignment of atomic instructions.
152 static unsigned getAtomicOpAlign(LoadInst *LI) {
153  unsigned Align = LI->getAlignment();
154  // In the future, if this IR restriction is relaxed, we should
155  // return DataLayout::getABITypeAlignment when there's no align
156  // value.
157  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
158  return Align;
159 }
160 
161 static unsigned getAtomicOpAlign(StoreInst *SI) {
162  unsigned Align = SI->getAlignment();
163  // In the future, if this IR restriction is relaxed, we should
164  // return DataLayout::getABITypeAlignment when there's no align
165  // value.
166  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
167  return Align;
168 }
169 
170 static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
171  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
172  // default alignment for load/store, the default here is to assume
173  // it has NATURAL alignment, not DataLayout-specified alignment.
174  const DataLayout &DL = RMWI->getModule()->getDataLayout();
175  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
176 }
177 
178 static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
179  // TODO(PR27168): same comment as above.
180  const DataLayout &DL = CASI->getModule()->getDataLayout();
181  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
182 }
183 
184 // Determine if a particular atomic operation has a supported size,
185 // and is of appropriate alignment, to be passed through for target
186 // lowering. (Versus turning into a __atomic libcall)
187 template <typename Inst>
188 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
189  unsigned Size = getAtomicOpSize(I);
190  unsigned Align = getAtomicOpAlign(I);
191  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
192 }
193 
195  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
196  if (!TPC)
197  return false;
198 
199  auto &TM = TPC->getTM<TargetMachine>();
200  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
201  return false;
202  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
203 
204  SmallVector<Instruction *, 1> AtomicInsts;
205 
206  // Changing control-flow while iterating through it is a bad idea, so gather a
207  // list of all atomic instructions before we start.
208  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
209  Instruction *I = &*II;
210  if (I->isAtomic() && !isa<FenceInst>(I))
211  AtomicInsts.push_back(I);
212  }
213 
214  bool MadeChange = false;
215  for (auto I : AtomicInsts) {
216  auto LI = dyn_cast<LoadInst>(I);
217  auto SI = dyn_cast<StoreInst>(I);
218  auto RMWI = dyn_cast<AtomicRMWInst>(I);
219  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
220  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
221 
222  // If the Size/Alignment is not supported, replace with a libcall.
223  if (LI) {
224  if (!atomicSizeSupported(TLI, LI)) {
225  expandAtomicLoadToLibcall(LI);
226  MadeChange = true;
227  continue;
228  }
229  } else if (SI) {
230  if (!atomicSizeSupported(TLI, SI)) {
231  expandAtomicStoreToLibcall(SI);
232  MadeChange = true;
233  continue;
234  }
235  } else if (RMWI) {
236  if (!atomicSizeSupported(TLI, RMWI)) {
237  expandAtomicRMWToLibcall(RMWI);
238  MadeChange = true;
239  continue;
240  }
241  } else if (CASI) {
242  if (!atomicSizeSupported(TLI, CASI)) {
243  expandAtomicCASToLibcall(CASI);
244  MadeChange = true;
245  continue;
246  }
247  }
248 
249  if (TLI->shouldInsertFencesForAtomic(I)) {
250  auto FenceOrdering = AtomicOrdering::Monotonic;
251  if (LI && isAcquireOrStronger(LI->getOrdering())) {
252  FenceOrdering = LI->getOrdering();
253  LI->setOrdering(AtomicOrdering::Monotonic);
254  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
255  FenceOrdering = SI->getOrdering();
256  SI->setOrdering(AtomicOrdering::Monotonic);
257  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
258  isAcquireOrStronger(RMWI->getOrdering()))) {
259  FenceOrdering = RMWI->getOrdering();
260  RMWI->setOrdering(AtomicOrdering::Monotonic);
261  } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
262  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
263  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
264  // If a compare and swap is lowered to LL/SC, we can do smarter fence
265  // insertion, with a stronger one on the success path than on the
266  // failure path. As a result, fence insertion is directly done by
267  // expandAtomicCmpXchg in that case.
268  FenceOrdering = CASI->getSuccessOrdering();
269  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
270  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
271  }
272 
273  if (FenceOrdering != AtomicOrdering::Monotonic) {
274  MadeChange |= bracketInstWithFences(I, FenceOrdering);
275  }
276  }
277 
278  if (LI) {
279  if (LI->getType()->isFloatingPointTy()) {
280  // TODO: add a TLI hook to control this so that each target can
281  // convert to lowering the original type one at a time.
282  LI = convertAtomicLoadToIntegerType(LI);
283  assert(LI->getType()->isIntegerTy() && "invariant broken");
284  MadeChange = true;
285  }
286 
287  MadeChange |= tryExpandAtomicLoad(LI);
288  } else if (SI) {
289  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
290  // TODO: add a TLI hook to control this so that each target can
291  // convert to lowering the original type one at a time.
292  SI = convertAtomicStoreToIntegerType(SI);
293  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
294  "invariant broken");
295  MadeChange = true;
296  }
297 
298  if (TLI->shouldExpandAtomicStoreInIR(SI))
299  MadeChange |= expandAtomicStore(SI);
300  } else if (RMWI) {
301  // There are two different ways of expanding RMW instructions:
302  // - into a load if it is idempotent
303  // - into a Cmpxchg/LL-SC loop otherwise
304  // we try them in that order.
305 
306  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
307  MadeChange = true;
308  } else {
309  MadeChange |= tryExpandAtomicRMW(RMWI);
310  }
311  } else if (CASI) {
312  // TODO: when we're ready to make the change at the IR level, we can
313  // extend convertCmpXchgToInteger for floating point too.
314  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
315  "unimplemented - floating point not legal at IR level");
316  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
317  // TODO: add a TLI hook to control this so that each target can
318  // convert to lowering the original type one at a time.
319  CASI = convertCmpXchgToIntegerType(CASI);
320  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
321  "invariant broken");
322  MadeChange = true;
323  }
324 
325  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
326  unsigned ValueSize = getAtomicOpSize(CASI);
327  if (ValueSize < MinCASSize) {
328  assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
329  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
330  expandPartwordCmpXchg(CASI);
331  } else {
332  if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
333  MadeChange |= expandAtomicCmpXchg(CASI);
334  }
335  }
336  }
337  return MadeChange;
338 }
339 
340 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
341  IRBuilder<> Builder(I);
342 
343  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
344 
345  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
346  // We have a guard here because not every atomic operation generates a
347  // trailing fence.
348  if (TrailingFence)
349  TrailingFence->moveAfter(I);
350 
351  return (LeadingFence || TrailingFence);
352 }
353 
354 /// Get the iX type with the same bitwidth as T.
355 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
356  const DataLayout &DL) {
357  EVT VT = TLI->getValueType(DL, T);
358  unsigned BitWidth = VT.getStoreSizeInBits();
359  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
360  return IntegerType::get(T->getContext(), BitWidth);
361 }
362 
363 /// Convert an atomic load of a non-integral type to an integer load of the
364 /// equivalent bitwidth. See the function comment on
365 /// convertAtomicStoreToIntegerType for background.
366 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
367  auto *M = LI->getModule();
368  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
369  M->getDataLayout());
370 
371  IRBuilder<> Builder(LI);
372 
373  Value *Addr = LI->getPointerOperand();
374  Type *PT = PointerType::get(NewTy,
375  Addr->getType()->getPointerAddressSpace());
376  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
377 
378  auto *NewLI = Builder.CreateLoad(NewAddr);
379  NewLI->setAlignment(LI->getAlignment());
380  NewLI->setVolatile(LI->isVolatile());
381  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
382  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
383 
384  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
385  LI->replaceAllUsesWith(NewVal);
386  LI->eraseFromParent();
387  return NewLI;
388 }
389 
390 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
391  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
393  return false;
395  expandAtomicOpToLLSC(
396  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
397  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
398  return true;
400  return expandAtomicLoadToLL(LI);
402  return expandAtomicLoadToCmpXchg(LI);
403  }
404  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
405 }
406 
407 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
408  IRBuilder<> Builder(LI);
409 
410  // On some architectures, load-linked instructions are atomic for larger
411  // sizes than normal loads. For example, the only 64-bit load guaranteed
412  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
413  Value *Val =
414  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
415  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
416 
417  LI->replaceAllUsesWith(Val);
418  LI->eraseFromParent();
419 
420  return true;
421 }
422 
423 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
424  IRBuilder<> Builder(LI);
425  AtomicOrdering Order = LI->getOrdering();
426  Value *Addr = LI->getPointerOperand();
427  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
428  Constant *DummyVal = Constant::getNullValue(Ty);
429 
430  Value *Pair = Builder.CreateAtomicCmpXchg(
431  Addr, DummyVal, DummyVal, Order,
433  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
434 
435  LI->replaceAllUsesWith(Loaded);
436  LI->eraseFromParent();
437 
438  return true;
439 }
440 
441 /// Convert an atomic store of a non-integral type to an integer store of the
442 /// equivalent bitwidth. We used to not support floating point or vector
443 /// atomics in the IR at all. The backends learned to deal with the bitcast
444 /// idiom because that was the only way of expressing the notion of a atomic
445 /// float or vector store. The long term plan is to teach each backend to
446 /// instruction select from the original atomic store, but as a migration
447 /// mechanism, we convert back to the old format which the backends understand.
448 /// Each backend will need individual work to recognize the new format.
449 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
450  IRBuilder<> Builder(SI);
451  auto *M = SI->getModule();
452  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
453  M->getDataLayout());
454  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
455 
456  Value *Addr = SI->getPointerOperand();
457  Type *PT = PointerType::get(NewTy,
458  Addr->getType()->getPointerAddressSpace());
459  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
460 
461  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
462  NewSI->setAlignment(SI->getAlignment());
463  NewSI->setVolatile(SI->isVolatile());
464  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
465  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
466  SI->eraseFromParent();
467  return NewSI;
468 }
469 
470 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
471  // This function is only called on atomic stores that are too large to be
472  // atomic if implemented as a native store. So we replace them by an
473  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
474  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
475  // It is the responsibility of the target to only signal expansion via
476  // shouldExpandAtomicRMW in cases where this is required and possible.
477  IRBuilder<> Builder(SI);
478  AtomicRMWInst *AI =
480  SI->getValueOperand(), SI->getOrdering());
481  SI->eraseFromParent();
482 
483  // Now we have an appropriate swap instruction, lower it as usual.
484  return tryExpandAtomicRMW(AI);
485 }
486 
487 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
488  Value *Loaded, Value *NewVal,
489  AtomicOrdering MemOpOrder,
490  Value *&Success, Value *&NewLoaded) {
491  Value* Pair = Builder.CreateAtomicCmpXchg(
492  Addr, Loaded, NewVal, MemOpOrder,
494  Success = Builder.CreateExtractValue(Pair, 1, "success");
495  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
496 }
497 
498 /// Emit IR to implement the given atomicrmw operation on values in registers,
499 /// returning the new value.
501  Value *Loaded, Value *Inc) {
502  Value *NewVal;
503  switch (Op) {
504  case AtomicRMWInst::Xchg:
505  return Inc;
506  case AtomicRMWInst::Add:
507  return Builder.CreateAdd(Loaded, Inc, "new");
508  case AtomicRMWInst::Sub:
509  return Builder.CreateSub(Loaded, Inc, "new");
510  case AtomicRMWInst::And:
511  return Builder.CreateAnd(Loaded, Inc, "new");
512  case AtomicRMWInst::Nand:
513  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
514  case AtomicRMWInst::Or:
515  return Builder.CreateOr(Loaded, Inc, "new");
516  case AtomicRMWInst::Xor:
517  return Builder.CreateXor(Loaded, Inc, "new");
518  case AtomicRMWInst::Max:
519  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
520  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
521  case AtomicRMWInst::Min:
522  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
523  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
524  case AtomicRMWInst::UMax:
525  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
526  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
527  case AtomicRMWInst::UMin:
528  NewVal = Builder.CreateICmpULE(Loaded, Inc);
529  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
530  default:
531  llvm_unreachable("Unknown atomic op");
532  }
533 }
534 
535 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
536  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
538  return false;
540  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
541  unsigned ValueSize = getAtomicOpSize(AI);
542  if (ValueSize < MinCASSize) {
544  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
545  } else {
546  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
547  return performAtomicOp(AI->getOperation(), Builder, Loaded,
548  AI->getValOperand());
549  };
550  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
551  AI->getOrdering(), PerformOp);
552  }
553  return true;
554  }
556  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
557  unsigned ValueSize = getAtomicOpSize(AI);
558  if (ValueSize < MinCASSize) {
559  expandPartwordAtomicRMW(AI,
561  } else {
563  }
564  return true;
565  }
566  default:
567  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
568  }
569 }
570 
571 namespace {
572 
573 /// Result values from createMaskInstrs helper.
574 struct PartwordMaskValues {
575  Type *WordType;
576  Type *ValueType;
577  Value *AlignedAddr;
578  Value *ShiftAmt;
579  Value *Mask;
580  Value *Inv_Mask;
581 };
582 
583 } // end anonymous namespace
584 
585 /// This is a helper function which builds instructions to provide
586 /// values necessary for partword atomic operations. It takes an
587 /// incoming address, Addr, and ValueType, and constructs the address,
588 /// shift-amounts and masks needed to work with a larger value of size
589 /// WordSize.
590 ///
591 /// AlignedAddr: Addr rounded down to a multiple of WordSize
592 ///
593 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
594 /// from AlignAddr for it to have the same value as if
595 /// ValueType was loaded from Addr.
596 ///
597 /// Mask: Value to mask with the value loaded from AlignAddr to
598 /// include only the part that would've been loaded from Addr.
599 ///
600 /// Inv_Mask: The inverse of Mask.
601 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
602  Type *ValueType, Value *Addr,
603  unsigned WordSize) {
604  PartwordMaskValues Ret;
605 
606  BasicBlock *BB = I->getParent();
607  Function *F = BB->getParent();
608  Module *M = I->getModule();
609 
610  LLVMContext &Ctx = F->getContext();
611  const DataLayout &DL = M->getDataLayout();
612 
613  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
614 
615  assert(ValueSize < WordSize);
616 
617  Ret.ValueType = ValueType;
618  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
619 
620  Type *WordPtrType =
621  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
622 
623  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
624  Ret.AlignedAddr = Builder.CreateIntToPtr(
625  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
626  "AlignedAddr");
627 
628  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
629  if (DL.isLittleEndian()) {
630  // turn bytes into bits
631  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
632  } else {
633  // turn bytes into bits, and count from the other side.
634  Ret.ShiftAmt =
635  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
636  }
637 
638  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
639  Ret.Mask = Builder.CreateShl(
640  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
641  "Mask");
642  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
643 
644  return Ret;
645 }
646 
647 /// Emit IR to implement a masked version of a given atomicrmw
648 /// operation. (That is, only the bits under the Mask should be
649 /// affected by the operation)
651  IRBuilder<> &Builder, Value *Loaded,
652  Value *Shifted_Inc, Value *Inc,
653  const PartwordMaskValues &PMV) {
654  switch (Op) {
655  case AtomicRMWInst::Xchg: {
656  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
657  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
658  return FinalVal;
659  }
660  case AtomicRMWInst::Or:
661  case AtomicRMWInst::Xor:
662  // Or/Xor won't affect any other bits, so can just be done
663  // directly.
664  return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
665  case AtomicRMWInst::Add:
666  case AtomicRMWInst::Sub:
667  case AtomicRMWInst::And:
668  case AtomicRMWInst::Nand: {
669  // The other arithmetic ops need to be masked into place.
670  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
671  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
672  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
673  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
674  return FinalVal;
675  }
676  case AtomicRMWInst::Max:
677  case AtomicRMWInst::Min:
678  case AtomicRMWInst::UMax:
679  case AtomicRMWInst::UMin: {
680  // Finally, comparison ops will operate on the full value, so
681  // truncate down to the original size, and expand out again after
682  // doing the operation.
683  Value *Loaded_Shiftdown = Builder.CreateTrunc(
684  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
685  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
686  Value *NewVal_Shiftup = Builder.CreateShl(
687  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
688  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
689  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
690  return FinalVal;
691  }
692  default:
693  llvm_unreachable("Unknown atomic op");
694  }
695 }
696 
697 /// Expand a sub-word atomicrmw operation into an appropriate
698 /// word-sized operation.
699 ///
700 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
701 /// way as a typical atomicrmw expansion. The only difference here is
702 /// that the operation inside of the loop must operate only upon a
703 /// part of the value.
704 void AtomicExpand::expandPartwordAtomicRMW(
707 
708  AtomicOrdering MemOpOrder = AI->getOrdering();
709 
710  IRBuilder<> Builder(AI);
711 
712  PartwordMaskValues PMV =
713  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
714  TLI->getMinCmpXchgSizeInBits() / 8);
715 
716  Value *ValOperand_Shifted =
717  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
718  PMV.ShiftAmt, "ValOperand_Shifted");
719 
720  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
721  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
722  ValOperand_Shifted, AI->getValOperand(), PMV);
723  };
724 
725  // TODO: When we're ready to support LLSC conversions too, use
726  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
727  Value *OldResult =
728  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
729  PerformPartwordOp, createCmpXchgInstFun);
730  Value *FinalOldResult = Builder.CreateTrunc(
731  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
732  AI->replaceAllUsesWith(FinalOldResult);
733  AI->eraseFromParent();
734 }
735 
736 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
737  // The basic idea here is that we're expanding a cmpxchg of a
738  // smaller memory size up to a word-sized cmpxchg. To do this, we
739  // need to add a retry-loop for strong cmpxchg, so that
740  // modifications to other parts of the word don't cause a spurious
741  // failure.
742 
743  // This generates code like the following:
744  // [[Setup mask values PMV.*]]
745  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
746  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
747  // %InitLoaded = load i32* %addr
748  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
749  // br partword.cmpxchg.loop
750  // partword.cmpxchg.loop:
751  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
752  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
753  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
754  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
755  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
756  // i32 %FullWord_NewVal success_ordering failure_ordering
757  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
758  // %Success = extractvalue { i32, i1 } %NewCI, 1
759  // br i1 %Success, label %partword.cmpxchg.end,
760  // label %partword.cmpxchg.failure
761  // partword.cmpxchg.failure:
762  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
763  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
764  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
765  // label %partword.cmpxchg.end
766  // partword.cmpxchg.end:
767  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
768  // %FinalOldVal = trunc i32 %tmp1 to i8
769  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
770  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
771 
772  Value *Addr = CI->getPointerOperand();
773  Value *Cmp = CI->getCompareOperand();
774  Value *NewVal = CI->getNewValOperand();
775 
776  BasicBlock *BB = CI->getParent();
777  Function *F = BB->getParent();
778  IRBuilder<> Builder(CI);
779  LLVMContext &Ctx = Builder.getContext();
780 
781  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
782 
783  BasicBlock *EndBB =
784  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
785  auto FailureBB =
786  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
787  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
788 
789  // The split call above "helpfully" added a branch at the end of BB
790  // (to the wrong place).
791  std::prev(BB->end())->eraseFromParent();
792  Builder.SetInsertPoint(BB);
793 
794  PartwordMaskValues PMV = createMaskInstrs(
795  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
796 
797  // Shift the incoming values over, into the right location in the word.
798  Value *NewVal_Shifted =
799  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
800  Value *Cmp_Shifted =
801  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
802 
803  // Load the entire current word, and mask into place the expected and new
804  // values
805  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
806  InitLoaded->setVolatile(CI->isVolatile());
807  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
808  Builder.CreateBr(LoopBB);
809 
810  // partword.cmpxchg.loop:
811  Builder.SetInsertPoint(LoopBB);
812  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
813  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
814 
815  // Mask/Or the expected and new values into place in the loaded word.
816  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
817  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
818  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
819  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
820  CI->getFailureOrdering(), CI->getSyncScopeID());
821  NewCI->setVolatile(CI->isVolatile());
822  // When we're building a strong cmpxchg, we need a loop, so you
823  // might think we could use a weak cmpxchg inside. But, using strong
824  // allows the below comparison for ShouldContinue, and we're
825  // expecting the underlying cmpxchg to be a machine instruction,
826  // which is strong anyways.
827  NewCI->setWeak(CI->isWeak());
828 
829  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
830  Value *Success = Builder.CreateExtractValue(NewCI, 1);
831 
832  if (CI->isWeak())
833  Builder.CreateBr(EndBB);
834  else
835  Builder.CreateCondBr(Success, EndBB, FailureBB);
836 
837  // partword.cmpxchg.failure:
838  Builder.SetInsertPoint(FailureBB);
839  // Upon failure, verify that the masked-out part of the loaded value
840  // has been modified. If it didn't, abort the cmpxchg, since the
841  // masked-in part must've.
842  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
843  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
844  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
845 
846  // Add the second value to the phi from above
847  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
848 
849  // partword.cmpxchg.end:
850  Builder.SetInsertPoint(CI);
851 
852  Value *FinalOldVal = Builder.CreateTrunc(
853  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
854  Value *Res = UndefValue::get(CI->getType());
855  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
856  Res = Builder.CreateInsertValue(Res, Success, 1);
857 
858  CI->replaceAllUsesWith(Res);
859  CI->eraseFromParent();
860 }
861 
862 void AtomicExpand::expandAtomicOpToLLSC(
863  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
864  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
865  IRBuilder<> Builder(I);
866  Value *Loaded =
867  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
868 
869  I->replaceAllUsesWith(Loaded);
870  I->eraseFromParent();
871 }
872 
873 Value *AtomicExpand::insertRMWLLSCLoop(
874  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
875  AtomicOrdering MemOpOrder,
876  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
877  LLVMContext &Ctx = Builder.getContext();
878  BasicBlock *BB = Builder.GetInsertBlock();
879  Function *F = BB->getParent();
880 
881  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
882  //
883  // The standard expansion we produce is:
884  // [...]
885  // atomicrmw.start:
886  // %loaded = @load.linked(%addr)
887  // %new = some_op iN %loaded, %incr
888  // %stored = @store_conditional(%new, %addr)
889  // %try_again = icmp i32 ne %stored, 0
890  // br i1 %try_again, label %loop, label %atomicrmw.end
891  // atomicrmw.end:
892  // [...]
893  BasicBlock *ExitBB =
894  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
895  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
896 
897  // The split call above "helpfully" added a branch at the end of BB (to the
898  // wrong place).
899  std::prev(BB->end())->eraseFromParent();
900  Builder.SetInsertPoint(BB);
901  Builder.CreateBr(LoopBB);
902 
903  // Start the main loop block now that we've taken care of the preliminaries.
904  Builder.SetInsertPoint(LoopBB);
905  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
906 
907  Value *NewVal = PerformOp(Builder, Loaded);
908 
909  Value *StoreSuccess =
910  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
911  Value *TryAgain = Builder.CreateICmpNE(
912  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
913  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
914 
915  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
916  return Loaded;
917 }
918 
919 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
920 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
921 /// IR. As a migration step, we convert back to what use to be the standard
922 /// way to represent a pointer cmpxchg so that we can update backends one by
923 /// one.
924 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
925  auto *M = CI->getModule();
926  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
927  M->getDataLayout());
928 
929  IRBuilder<> Builder(CI);
930 
931  Value *Addr = CI->getPointerOperand();
932  Type *PT = PointerType::get(NewTy,
933  Addr->getType()->getPointerAddressSpace());
934  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
935 
936  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
937  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
938 
939 
940  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
941  CI->getSuccessOrdering(),
942  CI->getFailureOrdering(),
943  CI->getSyncScopeID());
944  NewCI->setVolatile(CI->isVolatile());
945  NewCI->setWeak(CI->isWeak());
946  DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
947 
948  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
949  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
950 
951  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
952 
953  Value *Res = UndefValue::get(CI->getType());
954  Res = Builder.CreateInsertValue(Res, OldVal, 0);
955  Res = Builder.CreateInsertValue(Res, Succ, 1);
956 
957  CI->replaceAllUsesWith(Res);
958  CI->eraseFromParent();
959  return NewCI;
960 }
961 
962 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
963  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
964  AtomicOrdering FailureOrder = CI->getFailureOrdering();
965  Value *Addr = CI->getPointerOperand();
966  BasicBlock *BB = CI->getParent();
967  Function *F = BB->getParent();
968  LLVMContext &Ctx = F->getContext();
969  // If shouldInsertFencesForAtomic() returns true, then the target does not
970  // want to deal with memory orders, and emitLeading/TrailingFence should take
971  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
972  // should preserve the ordering.
973  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
974  AtomicOrdering MemOpOrder =
975  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
976 
977  // In implementations which use a barrier to achieve release semantics, we can
978  // delay emitting this barrier until we know a store is actually going to be
979  // attempted. The cost of this delay is that we need 2 copies of the block
980  // emitting the load-linked, affecting code size.
981  //
982  // Ideally, this logic would be unconditional except for the minsize check
983  // since in other cases the extra blocks naturally collapse down to the
984  // minimal loop. Unfortunately, this puts too much stress on later
985  // optimisations so we avoid emitting the extra logic in those cases too.
986  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
987  SuccessOrder != AtomicOrdering::Monotonic &&
988  SuccessOrder != AtomicOrdering::Acquire &&
989  !F->optForMinSize();
990 
991  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
992  // do it even on minsize.
993  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
994 
995  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
996  //
997  // The full expansion we produce is:
998  // [...]
999  // cmpxchg.start:
1000  // %unreleasedload = @load.linked(%addr)
1001  // %should_store = icmp eq %unreleasedload, %desired
1002  // br i1 %should_store, label %cmpxchg.fencedstore,
1003  // label %cmpxchg.nostore
1004  // cmpxchg.releasingstore:
1005  // fence?
1006  // br label cmpxchg.trystore
1007  // cmpxchg.trystore:
1008  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1009  // [%releasedload, %cmpxchg.releasedload]
1010  // %stored = @store_conditional(%new, %addr)
1011  // %success = icmp eq i32 %stored, 0
1012  // br i1 %success, label %cmpxchg.success,
1013  // label %cmpxchg.releasedload/%cmpxchg.failure
1014  // cmpxchg.releasedload:
1015  // %releasedload = @load.linked(%addr)
1016  // %should_store = icmp eq %releasedload, %desired
1017  // br i1 %should_store, label %cmpxchg.trystore,
1018  // label %cmpxchg.failure
1019  // cmpxchg.success:
1020  // fence?
1021  // br label %cmpxchg.end
1022  // cmpxchg.nostore:
1023  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1024  // [%releasedload,
1025  // %cmpxchg.releasedload/%cmpxchg.trystore]
1026  // @load_linked_fail_balance()?
1027  // br label %cmpxchg.failure
1028  // cmpxchg.failure:
1029  // fence?
1030  // br label %cmpxchg.end
1031  // cmpxchg.end:
1032  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1033  // [%loaded.trystore, %cmpxchg.trystore]
1034  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1035  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1036  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1037  // [...]
1038  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1039  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1040  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1041  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1042  auto ReleasedLoadBB =
1043  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1044  auto TryStoreBB =
1045  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1046  auto ReleasingStoreBB =
1047  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1048  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1049 
1050  // This grabs the DebugLoc from CI
1051  IRBuilder<> Builder(CI);
1052 
1053  // The split call above "helpfully" added a branch at the end of BB (to the
1054  // wrong place), but we might want a fence too. It's easiest to just remove
1055  // the branch entirely.
1056  std::prev(BB->end())->eraseFromParent();
1057  Builder.SetInsertPoint(BB);
1058  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1059  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1060  Builder.CreateBr(StartBB);
1061 
1062  // Start the main loop block now that we've taken care of the preliminaries.
1063  Builder.SetInsertPoint(StartBB);
1064  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1065  Value *ShouldStore = Builder.CreateICmpEQ(
1066  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1067 
1068  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1069  // jump straight past that fence instruction (if it exists).
1070  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1071 
1072  Builder.SetInsertPoint(ReleasingStoreBB);
1073  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1074  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1075  Builder.CreateBr(TryStoreBB);
1076 
1077  Builder.SetInsertPoint(TryStoreBB);
1078  Value *StoreSuccess = TLI->emitStoreConditional(
1079  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1080  StoreSuccess = Builder.CreateICmpEQ(
1081  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1082  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1083  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1084  CI->isWeak() ? FailureBB : RetryBB);
1085 
1086  Builder.SetInsertPoint(ReleasedLoadBB);
1087  Value *SecondLoad;
1088  if (HasReleasedLoadBB) {
1089  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1090  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1091  "should_store");
1092 
1093  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1094  // jump straight past that fence instruction (if it exists).
1095  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1096  } else
1097  Builder.CreateUnreachable();
1098 
1099  // Make sure later instructions don't get reordered with a fence if
1100  // necessary.
1101  Builder.SetInsertPoint(SuccessBB);
1102  if (ShouldInsertFencesForAtomic)
1103  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1104  Builder.CreateBr(ExitBB);
1105 
1106  Builder.SetInsertPoint(NoStoreBB);
1107  // In the failing case, where we don't execute the store-conditional, the
1108  // target might want to balance out the load-linked with a dedicated
1109  // instruction (e.g., on ARM, clearing the exclusive monitor).
1110  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1111  Builder.CreateBr(FailureBB);
1112 
1113  Builder.SetInsertPoint(FailureBB);
1114  if (ShouldInsertFencesForAtomic)
1115  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1116  Builder.CreateBr(ExitBB);
1117 
1118  // Finally, we have control-flow based knowledge of whether the cmpxchg
1119  // succeeded or not. We expose this to later passes by converting any
1120  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1121  // PHI.
1122  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1123  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1124  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1125  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1126 
1127  // Setup the builder so we can create any PHIs we need.
1128  Value *Loaded;
1129  if (!HasReleasedLoadBB)
1130  Loaded = UnreleasedLoad;
1131  else {
1132  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1133  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1134  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1135  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1136 
1137  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1138  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1139  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1140  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1141 
1142  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1143  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1144  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1145  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1146 
1147  Loaded = ExitLoaded;
1148  }
1149 
1150  // Look for any users of the cmpxchg that are just comparing the loaded value
1151  // against the desired one, and replace them with the CFG-derived version.
1153  for (auto User : CI->users()) {
1155  if (!EV)
1156  continue;
1157 
1158  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1159  "weird extraction from { iN, i1 }");
1160 
1161  if (EV->getIndices()[0] == 0)
1162  EV->replaceAllUsesWith(Loaded);
1163  else
1164  EV->replaceAllUsesWith(Success);
1165 
1166  PrunedInsts.push_back(EV);
1167  }
1168 
1169  // We can remove the instructions now we're no longer iterating through them.
1170  for (auto EV : PrunedInsts)
1171  EV->eraseFromParent();
1172 
1173  if (!CI->use_empty()) {
1174  // Some use of the full struct return that we don't understand has happened,
1175  // so we've got to reconstruct it properly.
1176  Value *Res;
1177  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1178  Res = Builder.CreateInsertValue(Res, Success, 1);
1179 
1180  CI->replaceAllUsesWith(Res);
1181  }
1182 
1183  CI->eraseFromParent();
1184  return true;
1185 }
1186 
1187 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1188  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1189  if(!C)
1190  return false;
1191 
1193  switch(Op) {
1194  case AtomicRMWInst::Add:
1195  case AtomicRMWInst::Sub:
1196  case AtomicRMWInst::Or:
1197  case AtomicRMWInst::Xor:
1198  return C->isZero();
1199  case AtomicRMWInst::And:
1200  return C->isMinusOne();
1201  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1202  default:
1203  return false;
1204  }
1205 }
1206 
1207 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1208  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1209  tryExpandAtomicLoad(ResultingLoad);
1210  return true;
1211  }
1212  return false;
1213 }
1214 
1215 Value *AtomicExpand::insertRMWCmpXchgLoop(
1216  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1217  AtomicOrdering MemOpOrder,
1218  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1219  CreateCmpXchgInstFun CreateCmpXchg) {
1220  LLVMContext &Ctx = Builder.getContext();
1221  BasicBlock *BB = Builder.GetInsertBlock();
1222  Function *F = BB->getParent();
1223 
1224  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1225  //
1226  // The standard expansion we produce is:
1227  // [...]
1228  // %init_loaded = load atomic iN* %addr
1229  // br label %loop
1230  // loop:
1231  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1232  // %new = some_op iN %loaded, %incr
1233  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1234  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1235  // %success = extractvalue { iN, i1 } %pair, 1
1236  // br i1 %success, label %atomicrmw.end, label %loop
1237  // atomicrmw.end:
1238  // [...]
1239  BasicBlock *ExitBB =
1240  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1241  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1242 
1243  // The split call above "helpfully" added a branch at the end of BB (to the
1244  // wrong place), but we want a load. It's easiest to just remove
1245  // the branch entirely.
1246  std::prev(BB->end())->eraseFromParent();
1247  Builder.SetInsertPoint(BB);
1248  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1249  // Atomics require at least natural alignment.
1250  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1251  Builder.CreateBr(LoopBB);
1252 
1253  // Start the main loop block now that we've taken care of the preliminaries.
1254  Builder.SetInsertPoint(LoopBB);
1255  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1256  Loaded->addIncoming(InitLoaded, BB);
1257 
1258  Value *NewVal = PerformOp(Builder, Loaded);
1259 
1260  Value *NewLoaded = nullptr;
1261  Value *Success = nullptr;
1262 
1263  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1264  MemOpOrder == AtomicOrdering::Unordered
1266  : MemOpOrder,
1267  Success, NewLoaded);
1268  assert(Success && NewLoaded);
1269 
1270  Loaded->addIncoming(NewLoaded, LoopBB);
1271 
1272  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1273 
1274  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1275  return NewLoaded;
1276 }
1277 
1278 // Note: This function is exposed externally by AtomicExpandUtils.h
1280  CreateCmpXchgInstFun CreateCmpXchg) {
1281  IRBuilder<> Builder(AI);
1282  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1283  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1284  [&](IRBuilder<> &Builder, Value *Loaded) {
1285  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1286  AI->getValOperand());
1287  },
1288  CreateCmpXchg);
1289 
1290  AI->replaceAllUsesWith(Loaded);
1291  AI->eraseFromParent();
1292  return true;
1293 }
1294 
1295 // In order to use one of the sized library calls such as
1296 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1297 // must be one of the potentially-specialized sizes, and the value
1298 // type must actually exist in C on the target (otherwise, the
1299 // function wouldn't actually be defined.)
1300 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1301  const DataLayout &DL) {
1302  // TODO: "LargestSize" is an approximation for "largest type that
1303  // you can express in C". It seems to be the case that int128 is
1304  // supported on all 64-bit platforms, otherwise only up to 64-bit
1305  // integers are supported. If we get this wrong, then we'll try to
1306  // call a sized libcall that doesn't actually exist. There should
1307  // really be some more reliable way in LLVM of determining integer
1308  // sizes which are valid in the target's C ABI...
1309  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1310  return Align >= Size &&
1311  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1312  Size <= LargestSize;
1313 }
1314 
1315 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1316  static const RTLIB::Libcall Libcalls[6] = {
1317  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1318  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1319  unsigned Size = getAtomicOpSize(I);
1320  unsigned Align = getAtomicOpAlign(I);
1321 
1322  bool expanded = expandAtomicOpToLibcall(
1323  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1324  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1325  (void)expanded;
1326  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1327 }
1328 
1329 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1330  static const RTLIB::Libcall Libcalls[6] = {
1331  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1332  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1333  unsigned Size = getAtomicOpSize(I);
1334  unsigned Align = getAtomicOpAlign(I);
1335 
1336  bool expanded = expandAtomicOpToLibcall(
1337  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1338  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1339  (void)expanded;
1340  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1341 }
1342 
1343 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1344  static const RTLIB::Libcall Libcalls[6] = {
1345  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1346  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1347  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1348  unsigned Size = getAtomicOpSize(I);
1349  unsigned Align = getAtomicOpAlign(I);
1350 
1351  bool expanded = expandAtomicOpToLibcall(
1352  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1354  Libcalls);
1355  (void)expanded;
1356  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1357 }
1358 
1360  static const RTLIB::Libcall LibcallsXchg[6] = {
1361  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1362  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1363  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1364  static const RTLIB::Libcall LibcallsAdd[6] = {
1365  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1366  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1367  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1368  static const RTLIB::Libcall LibcallsSub[6] = {
1369  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1370  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1371  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1372  static const RTLIB::Libcall LibcallsAnd[6] = {
1373  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1374  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1375  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1376  static const RTLIB::Libcall LibcallsOr[6] = {
1377  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1378  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1379  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1380  static const RTLIB::Libcall LibcallsXor[6] = {
1381  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1382  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1383  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1384  static const RTLIB::Libcall LibcallsNand[6] = {
1385  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1386  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1387  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1388 
1389  switch (Op) {
1391  llvm_unreachable("Should not have BAD_BINOP.");
1392  case AtomicRMWInst::Xchg:
1393  return makeArrayRef(LibcallsXchg);
1394  case AtomicRMWInst::Add:
1395  return makeArrayRef(LibcallsAdd);
1396  case AtomicRMWInst::Sub:
1397  return makeArrayRef(LibcallsSub);
1398  case AtomicRMWInst::And:
1399  return makeArrayRef(LibcallsAnd);
1400  case AtomicRMWInst::Or:
1401  return makeArrayRef(LibcallsOr);
1402  case AtomicRMWInst::Xor:
1403  return makeArrayRef(LibcallsXor);
1404  case AtomicRMWInst::Nand:
1405  return makeArrayRef(LibcallsNand);
1406  case AtomicRMWInst::Max:
1407  case AtomicRMWInst::Min:
1408  case AtomicRMWInst::UMax:
1409  case AtomicRMWInst::UMin:
1410  // No atomic libcalls are available for max/min/umax/umin.
1411  return {};
1412  }
1413  llvm_unreachable("Unexpected AtomicRMW operation.");
1414 }
1415 
1416 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1418 
1419  unsigned Size = getAtomicOpSize(I);
1420  unsigned Align = getAtomicOpAlign(I);
1421 
1422  bool Success = false;
1423  if (!Libcalls.empty())
1424  Success = expandAtomicOpToLibcall(
1425  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1426  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1427 
1428  // The expansion failed: either there were no libcalls at all for
1429  // the operation (min/max), or there were only size-specialized
1430  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1431  // CAS libcall, via a CAS loop, instead.
1432  if (!Success) {
1433  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1434  Value *Loaded, Value *NewVal,
1435  AtomicOrdering MemOpOrder,
1436  Value *&Success, Value *&NewLoaded) {
1437  // Create the CAS instruction normally...
1438  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1439  Addr, Loaded, NewVal, MemOpOrder,
1441  Success = Builder.CreateExtractValue(Pair, 1, "success");
1442  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1443 
1444  // ...and then expand the CAS into a libcall.
1445  expandAtomicCASToLibcall(Pair);
1446  });
1447  }
1448 }
1449 
1450 // A helper routine for the above expandAtomic*ToLibcall functions.
1451 //
1452 // 'Libcalls' contains an array of enum values for the particular
1453 // ATOMIC libcalls to be emitted. All of the other arguments besides
1454 // 'I' are extracted from the Instruction subclass by the
1455 // caller. Depending on the particular call, some will be null.
1456 bool AtomicExpand::expandAtomicOpToLibcall(
1457  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1458  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1459  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1460  assert(Libcalls.size() == 6);
1461 
1462  LLVMContext &Ctx = I->getContext();
1463  Module *M = I->getModule();
1464  const DataLayout &DL = M->getDataLayout();
1465  IRBuilder<> Builder(I);
1466  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1467 
1468  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1469  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1470 
1471  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1472 
1473  // TODO: the "order" argument type is "int", not int32. So
1474  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1475  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1476  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1477  Constant *OrderingVal =
1478  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1479  Constant *Ordering2Val = nullptr;
1480  if (CASExpected) {
1481  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1482  Ordering2Val =
1483  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1484  }
1485  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1486 
1487  RTLIB::Libcall RTLibType;
1488  if (UseSizedLibcall) {
1489  switch (Size) {
1490  case 1: RTLibType = Libcalls[1]; break;
1491  case 2: RTLibType = Libcalls[2]; break;
1492  case 4: RTLibType = Libcalls[3]; break;
1493  case 8: RTLibType = Libcalls[4]; break;
1494  case 16: RTLibType = Libcalls[5]; break;
1495  }
1496  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1497  RTLibType = Libcalls[0];
1498  } else {
1499  // Can't use sized function, and there's no generic for this
1500  // operation, so give up.
1501  return false;
1502  }
1503 
1504  // Build up the function call. There's two kinds. First, the sized
1505  // variants. These calls are going to be one of the following (with
1506  // N=1,2,4,8,16):
1507  // iN __atomic_load_N(iN *ptr, int ordering)
1508  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1509  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1510  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1511  // int success_order, int failure_order)
1512  //
1513  // Note that these functions can be used for non-integer atomic
1514  // operations, the values just need to be bitcast to integers on the
1515  // way in and out.
1516  //
1517  // And, then, the generic variants. They look like the following:
1518  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1519  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1520  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1521  // int ordering)
1522  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1523  // void *desired, int success_order,
1524  // int failure_order)
1525  //
1526  // The different signatures are built up depending on the
1527  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1528  // variables.
1529 
1530  AllocaInst *AllocaCASExpected = nullptr;
1531  Value *AllocaCASExpected_i8 = nullptr;
1532  AllocaInst *AllocaValue = nullptr;
1533  Value *AllocaValue_i8 = nullptr;
1534  AllocaInst *AllocaResult = nullptr;
1535  Value *AllocaResult_i8 = nullptr;
1536 
1537  Type *ResultTy;
1539  AttributeList Attr;
1540 
1541  // 'size' argument.
1542  if (!UseSizedLibcall) {
1543  // Note, getIntPtrType is assumed equivalent to size_t.
1544  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1545  }
1546 
1547  // 'ptr' argument.
1548  Value *PtrVal =
1549  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1550  Args.push_back(PtrVal);
1551 
1552  // 'expected' argument, if present.
1553  if (CASExpected) {
1554  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1555  AllocaCASExpected->setAlignment(AllocaAlignment);
1556  AllocaCASExpected_i8 =
1557  Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1558  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1559  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1560  Args.push_back(AllocaCASExpected_i8);
1561  }
1562 
1563  // 'val' argument ('desired' for cas), if present.
1564  if (ValueOperand) {
1565  if (UseSizedLibcall) {
1566  Value *IntValue =
1567  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1568  Args.push_back(IntValue);
1569  } else {
1570  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1571  AllocaValue->setAlignment(AllocaAlignment);
1572  AllocaValue_i8 =
1573  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1574  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1575  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1576  Args.push_back(AllocaValue_i8);
1577  }
1578  }
1579 
1580  // 'ret' argument.
1581  if (!CASExpected && HasResult && !UseSizedLibcall) {
1582  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1583  AllocaResult->setAlignment(AllocaAlignment);
1584  AllocaResult_i8 =
1585  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1586  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1587  Args.push_back(AllocaResult_i8);
1588  }
1589 
1590  // 'ordering' ('success_order' for cas) argument.
1591  Args.push_back(OrderingVal);
1592 
1593  // 'failure_order' argument, if present.
1594  if (Ordering2Val)
1595  Args.push_back(Ordering2Val);
1596 
1597  // Now, the return type.
1598  if (CASExpected) {
1599  ResultTy = Type::getInt1Ty(Ctx);
1600  Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1601  } else if (HasResult && UseSizedLibcall)
1602  ResultTy = SizedIntTy;
1603  else
1604  ResultTy = Type::getVoidTy(Ctx);
1605 
1606  // Done with setting up arguments and return types, create the call:
1607  SmallVector<Type *, 6> ArgTys;
1608  for (Value *Arg : Args)
1609  ArgTys.push_back(Arg->getType());
1610  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1611  Constant *LibcallFn =
1612  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1613  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1614  Call->setAttributes(Attr);
1615  Value *Result = Call;
1616 
1617  // And then, extract the results...
1618  if (ValueOperand && !UseSizedLibcall)
1619  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1620 
1621  if (CASExpected) {
1622  // The final result from the CAS is {load of 'expected' alloca, bool result
1623  // from call}
1624  Type *FinalResultTy = I->getType();
1625  Value *V = UndefValue::get(FinalResultTy);
1626  Value *ExpectedOut =
1627  Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
1628  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1629  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1630  V = Builder.CreateInsertValue(V, Result, 1);
1631  I->replaceAllUsesWith(V);
1632  } else if (HasResult) {
1633  Value *V;
1634  if (UseSizedLibcall)
1635  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1636  else {
1637  V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
1638  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1639  }
1640  I->replaceAllUsesWith(V);
1641  }
1642  I->eraseFromParent();
1643  return true;
1644 }
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:522
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:779
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
*p = old <signed v ? old : v
Definition: Instructions.h:707
LLVMContext & getContext() const
Definition: IRBuilder.h:124
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1553
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:142
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1099
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
void setAlignment(unsigned Align)
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:514
This class represents a function call, abstracting a target machine&#39;s calling convention.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:370
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:711
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:549
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:233
*p = old >unsigned v ? old : v
Definition: Instructions.h:709
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional l...
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:677
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1565
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:381
*p = old >signed v ? old : v
Definition: Instructions.h:705
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:206
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:581
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1203
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1146
ArrayRef< unsigned > getIndices() const
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:217
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:132
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
BinOp getOperation() const
Definition: Instructions.h:730
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:555
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:893
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:543
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1568
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1180
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1444
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1556
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:689
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:568
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:122
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:216
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1215
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:915
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1387
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1083
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Definition: IRBuilder.h:1168
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:702
static bool runOnFunction(Function &F, bool PostInlining)
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:863
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1693
const Instruction & front() const
Definition: BasicBlock.h:264
bool isAcquireOrStronger(AtomicOrdering ao)
unsigned getNumIndices() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:692
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:724
static unsigned getAtomicOpSize(LoadInst *LI)
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
Value * getPointerOperand()
Definition: Instructions.h:270
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1550
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1222
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1319
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:594
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1755
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:757
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1384
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:626
bool isReleaseOrStronger(AtomicOrdering ao)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1658
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:308
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
Value * getValOperand()
Definition: Instructions.h:783
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:310
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
iterator end()
Definition: BasicBlock.h:254
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:559
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:515
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:342
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
void setWeak(bool IsWeak)
Definition: Instructions.h:559
iterator_range< user_iterator > users()
Definition: Value.h:405
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1012
amdgpu Simplify well known AMD library false Value Value * Arg
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1520
AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:339
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:358
Value * getPointerOperand()
Definition: Instructions.h:779
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:245
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:527
static unsigned getAtomicOpAlign(LoadInst *LI)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:382
AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1067
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1440
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1186
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:386
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:773
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:123
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1031
#define DEBUG(X)
Definition: Debug.h:118
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:292
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:133
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:757
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1763
FunctionPass * createAtomicExpandPass()
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:220
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1577
Value * getPointerOperand()
Definition: Instructions.h:398
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:753
bool use_empty() const
Definition: Value.h:328
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663