LLVM  14.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InstIterator.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
50 #include <cassert>
51 #include <cstdint>
52 #include <iterator>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "atomic-expand"
57 
58 namespace {
59 
60  class AtomicExpand: public FunctionPass {
61  const TargetLowering *TLI = nullptr;
62 
63  public:
64  static char ID; // Pass identification, replacement for typeid
65 
66  AtomicExpand() : FunctionPass(ID) {
68  }
69 
70  bool runOnFunction(Function &F) override;
71 
72  private:
73  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
74  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
75  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
76  bool tryExpandAtomicLoad(LoadInst *LI);
77  bool expandAtomicLoadToLL(LoadInst *LI);
78  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
79  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
80  bool expandAtomicStore(StoreInst *SI);
81  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
82  AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
83  Value *
84  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
85  Align AddrAlign, AtomicOrdering MemOpOrder,
86  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
87  void expandAtomicOpToLLSC(
88  Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
89  AtomicOrdering MemOpOrder,
90  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
91  void expandPartwordAtomicRMW(
94  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
95  bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
96  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
97  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
98 
99  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
100  static Value *insertRMWCmpXchgLoop(
101  IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
102  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
103  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
104  CreateCmpXchgInstFun CreateCmpXchg);
105  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
106 
107  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
108  bool isIdempotentRMW(AtomicRMWInst *RMWI);
109  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
110 
111  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
112  Value *PointerOperand, Value *ValueOperand,
113  Value *CASExpected, AtomicOrdering Ordering,
114  AtomicOrdering Ordering2,
115  ArrayRef<RTLIB::Libcall> Libcalls);
116  void expandAtomicLoadToLibcall(LoadInst *LI);
117  void expandAtomicStoreToLibcall(StoreInst *LI);
118  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
119  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
120 
121  friend bool
123  CreateCmpXchgInstFun CreateCmpXchg);
124  };
125 
126 } // end anonymous namespace
127 
128 char AtomicExpand::ID = 0;
129 
131 
132 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
133  false, false)
134 
135 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
136 
137 // Helper functions to retrieve the size of atomic instructions.
138 static unsigned getAtomicOpSize(LoadInst *LI) {
139  const DataLayout &DL = LI->getModule()->getDataLayout();
140  return DL.getTypeStoreSize(LI->getType());
141 }
142 
143 static unsigned getAtomicOpSize(StoreInst *SI) {
144  const DataLayout &DL = SI->getModule()->getDataLayout();
145  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
146 }
147 
148 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
149  const DataLayout &DL = RMWI->getModule()->getDataLayout();
150  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
151 }
152 
153 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
154  const DataLayout &DL = CASI->getModule()->getDataLayout();
155  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
156 }
157 
158 // Determine if a particular atomic operation has a supported size,
159 // and is of appropriate alignment, to be passed through for target
160 // lowering. (Versus turning into a __atomic libcall)
161 template <typename Inst>
162 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
163  unsigned Size = getAtomicOpSize(I);
164  Align Alignment = I->getAlign();
165  return Alignment >= Size &&
167 }
168 
170  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
171  if (!TPC)
172  return false;
173 
174  auto &TM = TPC->getTM<TargetMachine>();
175  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
176  return false;
177  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
178 
179  SmallVector<Instruction *, 1> AtomicInsts;
180 
181  // Changing control-flow while iterating through it is a bad idea, so gather a
182  // list of all atomic instructions before we start.
183  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
184  Instruction *I = &*II;
185  if (I->isAtomic() && !isa<FenceInst>(I))
186  AtomicInsts.push_back(I);
187  }
188 
189  bool MadeChange = false;
190  for (auto I : AtomicInsts) {
191  auto LI = dyn_cast<LoadInst>(I);
192  auto SI = dyn_cast<StoreInst>(I);
193  auto RMWI = dyn_cast<AtomicRMWInst>(I);
194  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
195  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
196 
197  // If the Size/Alignment is not supported, replace with a libcall.
198  if (LI) {
199  if (!atomicSizeSupported(TLI, LI)) {
200  expandAtomicLoadToLibcall(LI);
201  MadeChange = true;
202  continue;
203  }
204  } else if (SI) {
205  if (!atomicSizeSupported(TLI, SI)) {
206  expandAtomicStoreToLibcall(SI);
207  MadeChange = true;
208  continue;
209  }
210  } else if (RMWI) {
211  if (!atomicSizeSupported(TLI, RMWI)) {
212  expandAtomicRMWToLibcall(RMWI);
213  MadeChange = true;
214  continue;
215  }
216  } else if (CASI) {
217  if (!atomicSizeSupported(TLI, CASI)) {
218  expandAtomicCASToLibcall(CASI);
219  MadeChange = true;
220  continue;
221  }
222  }
223 
224  if (TLI->shouldInsertFencesForAtomic(I)) {
225  auto FenceOrdering = AtomicOrdering::Monotonic;
226  if (LI && isAcquireOrStronger(LI->getOrdering())) {
227  FenceOrdering = LI->getOrdering();
228  LI->setOrdering(AtomicOrdering::Monotonic);
229  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
230  FenceOrdering = SI->getOrdering();
231  SI->setOrdering(AtomicOrdering::Monotonic);
232  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
233  isAcquireOrStronger(RMWI->getOrdering()))) {
234  FenceOrdering = RMWI->getOrdering();
235  RMWI->setOrdering(AtomicOrdering::Monotonic);
236  } else if (CASI &&
237  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
239  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
240  isAcquireOrStronger(CASI->getSuccessOrdering()) ||
241  isAcquireOrStronger(CASI->getFailureOrdering()))) {
242  // If a compare and swap is lowered to LL/SC, we can do smarter fence
243  // insertion, with a stronger one on the success path than on the
244  // failure path. As a result, fence insertion is directly done by
245  // expandAtomicCmpXchg in that case.
246  FenceOrdering = CASI->getMergedOrdering();
247  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
248  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
249  }
250 
251  if (FenceOrdering != AtomicOrdering::Monotonic) {
252  MadeChange |= bracketInstWithFences(I, FenceOrdering);
253  }
254  }
255 
256  if (LI) {
257  if (LI->getType()->isFloatingPointTy()) {
258  // TODO: add a TLI hook to control this so that each target can
259  // convert to lowering the original type one at a time.
260  LI = convertAtomicLoadToIntegerType(LI);
261  assert(LI->getType()->isIntegerTy() && "invariant broken");
262  MadeChange = true;
263  }
264 
265  MadeChange |= tryExpandAtomicLoad(LI);
266  } else if (SI) {
267  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
268  // TODO: add a TLI hook to control this so that each target can
269  // convert to lowering the original type one at a time.
270  SI = convertAtomicStoreToIntegerType(SI);
271  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
272  "invariant broken");
273  MadeChange = true;
274  }
275 
276  if (TLI->shouldExpandAtomicStoreInIR(SI))
277  MadeChange |= expandAtomicStore(SI);
278  } else if (RMWI) {
279  // There are two different ways of expanding RMW instructions:
280  // - into a load if it is idempotent
281  // - into a Cmpxchg/LL-SC loop otherwise
282  // we try them in that order.
283 
284  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
285  MadeChange = true;
286  } else {
287  AtomicRMWInst::BinOp Op = RMWI->getOperation();
288  if (Op == AtomicRMWInst::Xchg &&
289  RMWI->getValOperand()->getType()->isFloatingPointTy()) {
290  // TODO: add a TLI hook to control this so that each target can
291  // convert to lowering the original type one at a time.
292  RMWI = convertAtomicXchgToIntegerType(RMWI);
293  assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
294  "invariant broken");
295  MadeChange = true;
296  }
297  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
298  unsigned ValueSize = getAtomicOpSize(RMWI);
299  if (ValueSize < MinCASSize &&
301  Op == AtomicRMWInst::And)) {
302  RMWI = widenPartwordAtomicRMW(RMWI);
303  MadeChange = true;
304  }
305 
306  MadeChange |= tryExpandAtomicRMW(RMWI);
307  }
308  } else if (CASI) {
309  // TODO: when we're ready to make the change at the IR level, we can
310  // extend convertCmpXchgToInteger for floating point too.
311  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
312  "unimplemented - floating point not legal at IR level");
313  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
314  // TODO: add a TLI hook to control this so that each target can
315  // convert to lowering the original type one at a time.
316  CASI = convertCmpXchgToIntegerType(CASI);
317  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
318  "invariant broken");
319  MadeChange = true;
320  }
321 
322  MadeChange |= tryExpandAtomicCmpXchg(CASI);
323  }
324  }
325  return MadeChange;
326 }
327 
328 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
330 
331  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
332 
333  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
334  // We have a guard here because not every atomic operation generates a
335  // trailing fence.
336  if (TrailingFence)
337  TrailingFence->moveAfter(I);
338 
339  return (LeadingFence || TrailingFence);
340 }
341 
342 /// Get the iX type with the same bitwidth as T.
343 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
344  const DataLayout &DL) {
345  EVT VT = TLI->getMemValueType(DL, T);
346  unsigned BitWidth = VT.getStoreSizeInBits();
347  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
348  return IntegerType::get(T->getContext(), BitWidth);
349 }
350 
351 /// Convert an atomic load of a non-integral type to an integer load of the
352 /// equivalent bitwidth. See the function comment on
353 /// convertAtomicStoreToIntegerType for background.
354 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
355  auto *M = LI->getModule();
356  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
357  M->getDataLayout());
358 
359  IRBuilder<> Builder(LI);
360 
361  Value *Addr = LI->getPointerOperand();
362  Type *PT = PointerType::get(NewTy,
363  Addr->getType()->getPointerAddressSpace());
364  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
365 
366  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
367  NewLI->setAlignment(LI->getAlign());
368  NewLI->setVolatile(LI->isVolatile());
369  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
370  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
371 
372  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
373  LI->replaceAllUsesWith(NewVal);
374  LI->eraseFromParent();
375  return NewLI;
376 }
377 
379 AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
380  auto *M = RMWI->getModule();
381  Type *NewTy =
382  getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
383 
384  IRBuilder<> Builder(RMWI);
385 
386  Value *Addr = RMWI->getPointerOperand();
387  Value *Val = RMWI->getValOperand();
388  Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
389  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
390  Value *NewVal = Builder.CreateBitCast(Val, NewTy);
391 
392  auto *NewRMWI =
393  Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
394  RMWI->getAlign(), RMWI->getOrdering());
395  NewRMWI->setVolatile(RMWI->isVolatile());
396  LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
397 
398  Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
399  RMWI->replaceAllUsesWith(NewRVal);
400  RMWI->eraseFromParent();
401  return NewRMWI;
402 }
403 
404 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
405  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
407  return false;
409  expandAtomicOpToLLSC(
410  LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
411  LI->getOrdering(),
412  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
413  return true;
415  return expandAtomicLoadToLL(LI);
417  return expandAtomicLoadToCmpXchg(LI);
418  default:
419  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
420  }
421 }
422 
423 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
424  IRBuilder<> Builder(LI);
425 
426  // On some architectures, load-linked instructions are atomic for larger
427  // sizes than normal loads. For example, the only 64-bit load guaranteed
428  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
429  Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
430  LI->getPointerOperand(), LI->getOrdering());
431  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
432 
433  LI->replaceAllUsesWith(Val);
434  LI->eraseFromParent();
435 
436  return true;
437 }
438 
439 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
440  IRBuilder<> Builder(LI);
441  AtomicOrdering Order = LI->getOrdering();
442  if (Order == AtomicOrdering::Unordered)
444 
445  Value *Addr = LI->getPointerOperand();
446  Type *Ty = LI->getType();
447  Constant *DummyVal = Constant::getNullValue(Ty);
448 
449  Value *Pair = Builder.CreateAtomicCmpXchg(
450  Addr, DummyVal, DummyVal, LI->getAlign(), Order,
452  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
453 
454  LI->replaceAllUsesWith(Loaded);
455  LI->eraseFromParent();
456 
457  return true;
458 }
459 
460 /// Convert an atomic store of a non-integral type to an integer store of the
461 /// equivalent bitwidth. We used to not support floating point or vector
462 /// atomics in the IR at all. The backends learned to deal with the bitcast
463 /// idiom because that was the only way of expressing the notion of a atomic
464 /// float or vector store. The long term plan is to teach each backend to
465 /// instruction select from the original atomic store, but as a migration
466 /// mechanism, we convert back to the old format which the backends understand.
467 /// Each backend will need individual work to recognize the new format.
468 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
470  auto *M = SI->getModule();
471  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
472  M->getDataLayout());
473  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
474 
475  Value *Addr = SI->getPointerOperand();
476  Type *PT = PointerType::get(NewTy,
477  Addr->getType()->getPointerAddressSpace());
478  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
479 
480  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
481  NewSI->setAlignment(SI->getAlign());
482  NewSI->setVolatile(SI->isVolatile());
483  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
484  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
485  SI->eraseFromParent();
486  return NewSI;
487 }
488 
489 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
490  // This function is only called on atomic stores that are too large to be
491  // atomic if implemented as a native store. So we replace them by an
492  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
493  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
494  // It is the responsibility of the target to only signal expansion via
495  // shouldExpandAtomicRMW in cases where this is required and possible.
497  AtomicRMWInst *AI = Builder.CreateAtomicRMW(
498  AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
499  SI->getAlign(), SI->getOrdering());
500  SI->eraseFromParent();
501 
502  // Now we have an appropriate swap instruction, lower it as usual.
503  return tryExpandAtomicRMW(AI);
504 }
505 
507  Value *Loaded, Value *NewVal, Align AddrAlign,
508  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
509  Value *&Success, Value *&NewLoaded) {
510  Type *OrigTy = NewVal->getType();
511 
512  // This code can go away when cmpxchg supports FP types.
513  bool NeedBitcast = OrigTy->isFloatingPointTy();
514  if (NeedBitcast) {
515  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
516  unsigned AS = Addr->getType()->getPointerAddressSpace();
517  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
518  NewVal = Builder.CreateBitCast(NewVal, IntTy);
519  Loaded = Builder.CreateBitCast(Loaded, IntTy);
520  }
521 
522  Value *Pair = Builder.CreateAtomicCmpXchg(
523  Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
525  Success = Builder.CreateExtractValue(Pair, 1, "success");
526  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
527 
528  if (NeedBitcast)
529  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
530 }
531 
532 /// Emit IR to implement the given atomicrmw operation on values in registers,
533 /// returning the new value.
535  Value *Loaded, Value *Inc) {
536  Value *NewVal;
537  switch (Op) {
538  case AtomicRMWInst::Xchg:
539  return Inc;
540  case AtomicRMWInst::Add:
541  return Builder.CreateAdd(Loaded, Inc, "new");
542  case AtomicRMWInst::Sub:
543  return Builder.CreateSub(Loaded, Inc, "new");
544  case AtomicRMWInst::And:
545  return Builder.CreateAnd(Loaded, Inc, "new");
546  case AtomicRMWInst::Nand:
547  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
548  case AtomicRMWInst::Or:
549  return Builder.CreateOr(Loaded, Inc, "new");
550  case AtomicRMWInst::Xor:
551  return Builder.CreateXor(Loaded, Inc, "new");
552  case AtomicRMWInst::Max:
553  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
554  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
555  case AtomicRMWInst::Min:
556  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
557  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
558  case AtomicRMWInst::UMax:
559  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
560  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
561  case AtomicRMWInst::UMin:
562  NewVal = Builder.CreateICmpULE(Loaded, Inc);
563  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
564  case AtomicRMWInst::FAdd:
565  return Builder.CreateFAdd(Loaded, Inc, "new");
566  case AtomicRMWInst::FSub:
567  return Builder.CreateFSub(Loaded, Inc, "new");
568  default:
569  llvm_unreachable("Unknown atomic op");
570  }
571 }
572 
573 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
574  LLVMContext &Ctx = AI->getModule()->getContext();
575  TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
576  switch (Kind) {
578  return false;
580  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
581  unsigned ValueSize = getAtomicOpSize(AI);
582  if (ValueSize < MinCASSize) {
583  expandPartwordAtomicRMW(AI,
585  } else {
586  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
587  return performAtomicOp(AI->getOperation(), Builder, Loaded,
588  AI->getValOperand());
589  };
590  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
591  AI->getAlign(), AI->getOrdering(), PerformOp);
592  }
593  return true;
594  }
596  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
597  unsigned ValueSize = getAtomicOpSize(AI);
598  if (ValueSize < MinCASSize) {
599  // TODO: Handle atomicrmw fadd/fsub
600  if (AI->getType()->isFloatingPointTy())
601  return false;
602 
603  expandPartwordAtomicRMW(AI,
605  } else {
607  Ctx.getSyncScopeNames(SSNs);
608  auto MemScope = SSNs[AI->getSyncScopeID()].empty()
609  ? "system"
610  : SSNs[AI->getSyncScopeID()];
612  ORE.emit([&]() {
613  return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
614  << "A compare and swap loop was generated for an atomic "
615  << AI->getOperationName(AI->getOperation()) << " operation at "
616  << MemScope << " memory scope";
617  });
619  }
620  return true;
621  }
623  expandAtomicRMWToMaskedIntrinsic(AI);
624  return true;
625  }
626  default:
627  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
628  }
629 }
630 
631 namespace {
632 
633 struct PartwordMaskValues {
634  // These three fields are guaranteed to be set by createMaskInstrs.
635  Type *WordType = nullptr;
636  Type *ValueType = nullptr;
637  Value *AlignedAddr = nullptr;
638  Align AlignedAddrAlignment;
639  // The remaining fields can be null.
640  Value *ShiftAmt = nullptr;
641  Value *Mask = nullptr;
642  Value *Inv_Mask = nullptr;
643 };
644 
646 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
647  auto PrintObj = [&O](auto *V) {
648  if (V)
649  O << *V;
650  else
651  O << "nullptr";
652  O << '\n';
653  };
654  O << "PartwordMaskValues {\n";
655  O << " WordType: ";
656  PrintObj(PMV.WordType);
657  O << " ValueType: ";
658  PrintObj(PMV.ValueType);
659  O << " AlignedAddr: ";
660  PrintObj(PMV.AlignedAddr);
661  O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
662  O << " ShiftAmt: ";
663  PrintObj(PMV.ShiftAmt);
664  O << " Mask: ";
665  PrintObj(PMV.Mask);
666  O << " Inv_Mask: ";
667  PrintObj(PMV.Inv_Mask);
668  O << "}\n";
669  return O;
670 }
671 
672 } // end anonymous namespace
673 
674 /// This is a helper function which builds instructions to provide
675 /// values necessary for partword atomic operations. It takes an
676 /// incoming address, Addr, and ValueType, and constructs the address,
677 /// shift-amounts and masks needed to work with a larger value of size
678 /// WordSize.
679 ///
680 /// AlignedAddr: Addr rounded down to a multiple of WordSize
681 ///
682 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
683 /// from AlignAddr for it to have the same value as if
684 /// ValueType was loaded from Addr.
685 ///
686 /// Mask: Value to mask with the value loaded from AlignAddr to
687 /// include only the part that would've been loaded from Addr.
688 ///
689 /// Inv_Mask: The inverse of Mask.
690 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
692  Align AddrAlign,
693  unsigned MinWordSize) {
694  PartwordMaskValues PMV;
695 
696  Module *M = I->getModule();
697  LLVMContext &Ctx = M->getContext();
698  const DataLayout &DL = M->getDataLayout();
699  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
700 
701  PMV.ValueType = ValueType;
702  PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
703  : ValueType;
704  if (PMV.ValueType == PMV.WordType) {
705  PMV.AlignedAddr = Addr;
706  PMV.AlignedAddrAlignment = AddrAlign;
707  PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
708  PMV.Mask = ConstantInt::get(PMV.ValueType, ~0);
709  return PMV;
710  }
711 
712  assert(ValueSize < MinWordSize);
713 
714  Type *WordPtrType =
715  PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
716 
717  // TODO: we could skip some of this if AddrAlign >= MinWordSize.
718  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
719  PMV.AlignedAddr = Builder.CreateIntToPtr(
720  Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
721  "AlignedAddr");
722  PMV.AlignedAddrAlignment = Align(MinWordSize);
723 
724  Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
725  if (DL.isLittleEndian()) {
726  // turn bytes into bits
727  PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
728  } else {
729  // turn bytes into bits, and count from the other side.
730  PMV.ShiftAmt = Builder.CreateShl(
731  Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
732  }
733 
734  PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
735  PMV.Mask = Builder.CreateShl(
736  ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
737  "Mask");
738  PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
739  return PMV;
740 }
741 
743  const PartwordMaskValues &PMV) {
744  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
745  if (PMV.WordType == PMV.ValueType)
746  return WideWord;
747 
748  Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
749  Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
750  return Trunc;
751 }
752 
754  Value *Updated, const PartwordMaskValues &PMV) {
755  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
756  assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
757  if (PMV.WordType == PMV.ValueType)
758  return Updated;
759 
760  Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
761  Value *Shift =
762  Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
763  Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
764  Value *Or = Builder.CreateOr(And, Shift, "inserted");
765  return Or;
766 }
767 
768 /// Emit IR to implement a masked version of a given atomicrmw
769 /// operation. (That is, only the bits under the Mask should be
770 /// affected by the operation)
772  IRBuilder<> &Builder, Value *Loaded,
773  Value *Shifted_Inc, Value *Inc,
774  const PartwordMaskValues &PMV) {
775  // TODO: update to use
776  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
777  // to merge bits from two values without requiring PMV.Inv_Mask.
778  switch (Op) {
779  case AtomicRMWInst::Xchg: {
780  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
781  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
782  return FinalVal;
783  }
784  case AtomicRMWInst::Or:
785  case AtomicRMWInst::Xor:
786  case AtomicRMWInst::And:
787  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
788  case AtomicRMWInst::Add:
789  case AtomicRMWInst::Sub:
790  case AtomicRMWInst::Nand: {
791  // The other arithmetic ops need to be masked into place.
792  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
793  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
794  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
795  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
796  return FinalVal;
797  }
798  case AtomicRMWInst::Max:
799  case AtomicRMWInst::Min:
800  case AtomicRMWInst::UMax:
801  case AtomicRMWInst::UMin: {
802  // Finally, comparison ops will operate on the full value, so
803  // truncate down to the original size, and expand out again after
804  // doing the operation.
805  Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
806  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
807  Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
808  return FinalVal;
809  }
810  default:
811  llvm_unreachable("Unknown atomic op");
812  }
813 }
814 
815 /// Expand a sub-word atomicrmw operation into an appropriate
816 /// word-sized operation.
817 ///
818 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
819 /// way as a typical atomicrmw expansion. The only difference here is
820 /// that the operation inside of the loop may operate upon only a
821 /// part of the value.
822 void AtomicExpand::expandPartwordAtomicRMW(
824  AtomicOrdering MemOpOrder = AI->getOrdering();
825  SyncScope::ID SSID = AI->getSyncScopeID();
826 
827  IRBuilder<> Builder(AI);
828 
829  PartwordMaskValues PMV =
831  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
832 
833  Value *ValOperand_Shifted =
834  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
835  PMV.ShiftAmt, "ValOperand_Shifted");
836 
837  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
838  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
839  ValOperand_Shifted, AI->getValOperand(), PMV);
840  };
841 
842  Value *OldResult;
844  OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
845  PMV.AlignedAddrAlignment, MemOpOrder,
846  SSID, PerformPartwordOp,
848  } else {
850  OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
851  PMV.AlignedAddrAlignment, MemOpOrder,
852  PerformPartwordOp);
853  }
854 
855  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
856  AI->replaceAllUsesWith(FinalOldResult);
857  AI->eraseFromParent();
858 }
859 
860 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
861 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
862  IRBuilder<> Builder(AI);
864 
866  Op == AtomicRMWInst::And) &&
867  "Unable to widen operation");
868 
869  PartwordMaskValues PMV =
871  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
872 
873  Value *ValOperand_Shifted =
874  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
875  PMV.ShiftAmt, "ValOperand_Shifted");
876 
877  Value *NewOperand;
878 
879  if (Op == AtomicRMWInst::And)
880  NewOperand =
881  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
882  else
883  NewOperand = ValOperand_Shifted;
884 
885  AtomicRMWInst *NewAI =
886  Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
887  PMV.AlignedAddrAlignment, AI->getOrdering());
888 
889  Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
890  AI->replaceAllUsesWith(FinalOldResult);
891  AI->eraseFromParent();
892  return NewAI;
893 }
894 
895 bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
896  // The basic idea here is that we're expanding a cmpxchg of a
897  // smaller memory size up to a word-sized cmpxchg. To do this, we
898  // need to add a retry-loop for strong cmpxchg, so that
899  // modifications to other parts of the word don't cause a spurious
900  // failure.
901 
902  // This generates code like the following:
903  // [[Setup mask values PMV.*]]
904  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
905  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
906  // %InitLoaded = load i32* %addr
907  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
908  // br partword.cmpxchg.loop
909  // partword.cmpxchg.loop:
910  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
911  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
912  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
913  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
914  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
915  // i32 %FullWord_NewVal success_ordering failure_ordering
916  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
917  // %Success = extractvalue { i32, i1 } %NewCI, 1
918  // br i1 %Success, label %partword.cmpxchg.end,
919  // label %partword.cmpxchg.failure
920  // partword.cmpxchg.failure:
921  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
922  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
923  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
924  // label %partword.cmpxchg.end
925  // partword.cmpxchg.end:
926  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
927  // %FinalOldVal = trunc i32 %tmp1 to i8
928  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
929  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
930 
931  Value *Addr = CI->getPointerOperand();
932  Value *Cmp = CI->getCompareOperand();
933  Value *NewVal = CI->getNewValOperand();
934 
935  BasicBlock *BB = CI->getParent();
936  Function *F = BB->getParent();
937  IRBuilder<> Builder(CI);
938  LLVMContext &Ctx = Builder.getContext();
939 
940  BasicBlock *EndBB =
941  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
942  auto FailureBB =
943  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
944  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
945 
946  // The split call above "helpfully" added a branch at the end of BB
947  // (to the wrong place).
948  std::prev(BB->end())->eraseFromParent();
949  Builder.SetInsertPoint(BB);
950 
951  PartwordMaskValues PMV =
953  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
954 
955  // Shift the incoming values over, into the right location in the word.
956  Value *NewVal_Shifted =
957  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
958  Value *Cmp_Shifted =
959  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
960 
961  // Load the entire current word, and mask into place the expected and new
962  // values
963  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
964  InitLoaded->setVolatile(CI->isVolatile());
965  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
966  Builder.CreateBr(LoopBB);
967 
968  // partword.cmpxchg.loop:
969  Builder.SetInsertPoint(LoopBB);
970  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
971  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
972 
973  // Mask/Or the expected and new values into place in the loaded word.
974  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
975  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
976  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
977  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
979  NewCI->setVolatile(CI->isVolatile());
980  // When we're building a strong cmpxchg, we need a loop, so you
981  // might think we could use a weak cmpxchg inside. But, using strong
982  // allows the below comparison for ShouldContinue, and we're
983  // expecting the underlying cmpxchg to be a machine instruction,
984  // which is strong anyways.
985  NewCI->setWeak(CI->isWeak());
986 
987  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
988  Value *Success = Builder.CreateExtractValue(NewCI, 1);
989 
990  if (CI->isWeak())
991  Builder.CreateBr(EndBB);
992  else
993  Builder.CreateCondBr(Success, EndBB, FailureBB);
994 
995  // partword.cmpxchg.failure:
996  Builder.SetInsertPoint(FailureBB);
997  // Upon failure, verify that the masked-out part of the loaded value
998  // has been modified. If it didn't, abort the cmpxchg, since the
999  // masked-in part must've.
1000  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1001  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1002  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1003 
1004  // Add the second value to the phi from above
1005  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1006 
1007  // partword.cmpxchg.end:
1008  Builder.SetInsertPoint(CI);
1009 
1010  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1011  Value *Res = UndefValue::get(CI->getType());
1012  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1013  Res = Builder.CreateInsertValue(Res, Success, 1);
1014 
1015  CI->replaceAllUsesWith(Res);
1016  CI->eraseFromParent();
1017  return true;
1018 }
1019 
1020 void AtomicExpand::expandAtomicOpToLLSC(
1021  Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1022  AtomicOrdering MemOpOrder,
1023  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1025  Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1026  MemOpOrder, PerformOp);
1027 
1028  I->replaceAllUsesWith(Loaded);
1029  I->eraseFromParent();
1030 }
1031 
1032 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1033  IRBuilder<> Builder(AI);
1034 
1035  PartwordMaskValues PMV =
1037  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1038 
1039  // The value operand must be sign-extended for signed min/max so that the
1040  // target's signed comparison instructions can be used. Otherwise, just
1041  // zero-ext.
1042  Instruction::CastOps CastOp = Instruction::ZExt;
1043  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1044  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1045  CastOp = Instruction::SExt;
1046 
1047  Value *ValOperand_Shifted = Builder.CreateShl(
1048  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1049  PMV.ShiftAmt, "ValOperand_Shifted");
1050  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1051  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1052  AI->getOrdering());
1053  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1054  AI->replaceAllUsesWith(FinalOldResult);
1055  AI->eraseFromParent();
1056 }
1057 
1058 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1059  IRBuilder<> Builder(CI);
1060 
1061  PartwordMaskValues PMV = createMaskInstrs(
1062  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1063  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1064 
1065  Value *CmpVal_Shifted = Builder.CreateShl(
1066  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1067  "CmpVal_Shifted");
1068  Value *NewVal_Shifted = Builder.CreateShl(
1069  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1070  "NewVal_Shifted");
1071  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1072  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1073  CI->getMergedOrdering());
1074  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1075  Value *Res = UndefValue::get(CI->getType());
1076  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1077  Value *Success = Builder.CreateICmpEQ(
1078  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1079  Res = Builder.CreateInsertValue(Res, Success, 1);
1080 
1081  CI->replaceAllUsesWith(Res);
1082  CI->eraseFromParent();
1083 }
1084 
1085 Value *AtomicExpand::insertRMWLLSCLoop(
1086  IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1087  AtomicOrdering MemOpOrder,
1088  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1089  LLVMContext &Ctx = Builder.getContext();
1090  BasicBlock *BB = Builder.GetInsertBlock();
1091  Function *F = BB->getParent();
1092 
1093  assert(AddrAlign >=
1094  F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1095  "Expected at least natural alignment at this point.");
1096 
1097  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1098  //
1099  // The standard expansion we produce is:
1100  // [...]
1101  // atomicrmw.start:
1102  // %loaded = @load.linked(%addr)
1103  // %new = some_op iN %loaded, %incr
1104  // %stored = @store_conditional(%new, %addr)
1105  // %try_again = icmp i32 ne %stored, 0
1106  // br i1 %try_again, label %loop, label %atomicrmw.end
1107  // atomicrmw.end:
1108  // [...]
1109  BasicBlock *ExitBB =
1110  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1111  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1112 
1113  // The split call above "helpfully" added a branch at the end of BB (to the
1114  // wrong place).
1115  std::prev(BB->end())->eraseFromParent();
1116  Builder.SetInsertPoint(BB);
1117  Builder.CreateBr(LoopBB);
1118 
1119  // Start the main loop block now that we've taken care of the preliminaries.
1120  Builder.SetInsertPoint(LoopBB);
1121  Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1122 
1123  Value *NewVal = PerformOp(Builder, Loaded);
1124 
1125  Value *StoreSuccess =
1126  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1127  Value *TryAgain = Builder.CreateICmpNE(
1128  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1129  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1130 
1131  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1132  return Loaded;
1133 }
1134 
1135 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1136 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1137 /// IR. As a migration step, we convert back to what use to be the standard
1138 /// way to represent a pointer cmpxchg so that we can update backends one by
1139 /// one.
1140 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1141  auto *M = CI->getModule();
1142  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1143  M->getDataLayout());
1144 
1145  IRBuilder<> Builder(CI);
1146 
1147  Value *Addr = CI->getPointerOperand();
1148  Type *PT = PointerType::get(NewTy,
1149  Addr->getType()->getPointerAddressSpace());
1150  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1151 
1152  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1153  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1154 
1155  auto *NewCI = Builder.CreateAtomicCmpXchg(
1156  NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1157  CI->getFailureOrdering(), CI->getSyncScopeID());
1158  NewCI->setVolatile(CI->isVolatile());
1159  NewCI->setWeak(CI->isWeak());
1160  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1161 
1162  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1163  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1164 
1165  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1166 
1167  Value *Res = UndefValue::get(CI->getType());
1168  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1169  Res = Builder.CreateInsertValue(Res, Succ, 1);
1170 
1171  CI->replaceAllUsesWith(Res);
1172  CI->eraseFromParent();
1173  return NewCI;
1174 }
1175 
1176 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1177  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1178  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1179  Value *Addr = CI->getPointerOperand();
1180  BasicBlock *BB = CI->getParent();
1181  Function *F = BB->getParent();
1182  LLVMContext &Ctx = F->getContext();
1183  // If shouldInsertFencesForAtomic() returns true, then the target does not
1184  // want to deal with memory orders, and emitLeading/TrailingFence should take
1185  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1186  // should preserve the ordering.
1187  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1188  AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1190  : CI->getMergedOrdering();
1191 
1192  // In implementations which use a barrier to achieve release semantics, we can
1193  // delay emitting this barrier until we know a store is actually going to be
1194  // attempted. The cost of this delay is that we need 2 copies of the block
1195  // emitting the load-linked, affecting code size.
1196  //
1197  // Ideally, this logic would be unconditional except for the minsize check
1198  // since in other cases the extra blocks naturally collapse down to the
1199  // minimal loop. Unfortunately, this puts too much stress on later
1200  // optimisations so we avoid emitting the extra logic in those cases too.
1201  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1202  SuccessOrder != AtomicOrdering::Monotonic &&
1203  SuccessOrder != AtomicOrdering::Acquire &&
1204  !F->hasMinSize();
1205 
1206  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1207  // do it even on minsize.
1208  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1209 
1210  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1211  //
1212  // The full expansion we produce is:
1213  // [...]
1214  // %aligned.addr = ...
1215  // cmpxchg.start:
1216  // %unreleasedload = @load.linked(%aligned.addr)
1217  // %unreleasedload.extract = extract value from %unreleasedload
1218  // %should_store = icmp eq %unreleasedload.extract, %desired
1219  // br i1 %should_store, label %cmpxchg.releasingstore,
1220  // label %cmpxchg.nostore
1221  // cmpxchg.releasingstore:
1222  // fence?
1223  // br label cmpxchg.trystore
1224  // cmpxchg.trystore:
1225  // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1226  // [%releasedload, %cmpxchg.releasedload]
1227  // %updated.new = insert %new into %loaded.trystore
1228  // %stored = @store_conditional(%updated.new, %aligned.addr)
1229  // %success = icmp eq i32 %stored, 0
1230  // br i1 %success, label %cmpxchg.success,
1231  // label %cmpxchg.releasedload/%cmpxchg.failure
1232  // cmpxchg.releasedload:
1233  // %releasedload = @load.linked(%aligned.addr)
1234  // %releasedload.extract = extract value from %releasedload
1235  // %should_store = icmp eq %releasedload.extract, %desired
1236  // br i1 %should_store, label %cmpxchg.trystore,
1237  // label %cmpxchg.failure
1238  // cmpxchg.success:
1239  // fence?
1240  // br label %cmpxchg.end
1241  // cmpxchg.nostore:
1242  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1243  // [%releasedload,
1244  // %cmpxchg.releasedload/%cmpxchg.trystore]
1245  // @load_linked_fail_balance()?
1246  // br label %cmpxchg.failure
1247  // cmpxchg.failure:
1248  // fence?
1249  // br label %cmpxchg.end
1250  // cmpxchg.end:
1251  // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1252  // [%loaded.trystore, %cmpxchg.trystore]
1253  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1254  // %loaded = extract value from %loaded.exit
1255  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1256  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1257  // [...]
1258  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1259  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1260  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1261  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1262  auto ReleasedLoadBB =
1263  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1264  auto TryStoreBB =
1265  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1266  auto ReleasingStoreBB =
1267  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1268  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1269 
1270  // This grabs the DebugLoc from CI
1271  IRBuilder<> Builder(CI);
1272 
1273  // The split call above "helpfully" added a branch at the end of BB (to the
1274  // wrong place), but we might want a fence too. It's easiest to just remove
1275  // the branch entirely.
1276  std::prev(BB->end())->eraseFromParent();
1277  Builder.SetInsertPoint(BB);
1278  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1279  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1280 
1281  PartwordMaskValues PMV =
1283  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1284  Builder.CreateBr(StartBB);
1285 
1286  // Start the main loop block now that we've taken care of the preliminaries.
1287  Builder.SetInsertPoint(StartBB);
1288  Value *UnreleasedLoad =
1289  TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1290  Value *UnreleasedLoadExtract =
1291  extractMaskedValue(Builder, UnreleasedLoad, PMV);
1292  Value *ShouldStore = Builder.CreateICmpEQ(
1293  UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1294 
1295  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1296  // jump straight past that fence instruction (if it exists).
1297  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1298 
1299  Builder.SetInsertPoint(ReleasingStoreBB);
1300  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1301  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1302  Builder.CreateBr(TryStoreBB);
1303 
1304  Builder.SetInsertPoint(TryStoreBB);
1305  PHINode *LoadedTryStore =
1306  Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1307  LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1308  Value *NewValueInsert =
1309  insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1310  Value *StoreSuccess =
1311  TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr,
1312  MemOpOrder);
1313  StoreSuccess = Builder.CreateICmpEQ(
1314  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1315  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1316  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1317  CI->isWeak() ? FailureBB : RetryBB);
1318 
1319  Builder.SetInsertPoint(ReleasedLoadBB);
1320  Value *SecondLoad;
1321  if (HasReleasedLoadBB) {
1322  SecondLoad =
1323  TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1324  Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1325  ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1326  CI->getCompareOperand(), "should_store");
1327 
1328  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1329  // jump straight past that fence instruction (if it exists).
1330  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1331  // Update PHI node in TryStoreBB.
1332  LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1333  } else
1334  Builder.CreateUnreachable();
1335 
1336  // Make sure later instructions don't get reordered with a fence if
1337  // necessary.
1338  Builder.SetInsertPoint(SuccessBB);
1339  if (ShouldInsertFencesForAtomic)
1340  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1341  Builder.CreateBr(ExitBB);
1342 
1343  Builder.SetInsertPoint(NoStoreBB);
1344  PHINode *LoadedNoStore =
1345  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1346  LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1347  if (HasReleasedLoadBB)
1348  LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1349 
1350  // In the failing case, where we don't execute the store-conditional, the
1351  // target might want to balance out the load-linked with a dedicated
1352  // instruction (e.g., on ARM, clearing the exclusive monitor).
1353  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1354  Builder.CreateBr(FailureBB);
1355 
1356  Builder.SetInsertPoint(FailureBB);
1357  PHINode *LoadedFailure =
1358  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1359  LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1360  if (CI->isWeak())
1361  LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1362  if (ShouldInsertFencesForAtomic)
1363  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1364  Builder.CreateBr(ExitBB);
1365 
1366  // Finally, we have control-flow based knowledge of whether the cmpxchg
1367  // succeeded or not. We expose this to later passes by converting any
1368  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1369  // PHI.
1370  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1371  PHINode *LoadedExit =
1372  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1373  LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1374  LoadedExit->addIncoming(LoadedFailure, FailureBB);
1375  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1376  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1377  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1378 
1379  // This is the "exit value" from the cmpxchg expansion. It may be of
1380  // a type wider than the one in the cmpxchg instruction.
1381  Value *LoadedFull = LoadedExit;
1382 
1383  Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1384  Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1385 
1386  // Look for any users of the cmpxchg that are just comparing the loaded value
1387  // against the desired one, and replace them with the CFG-derived version.
1389  for (auto User : CI->users()) {
1390  ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1391  if (!EV)
1392  continue;
1393 
1394  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1395  "weird extraction from { iN, i1 }");
1396 
1397  if (EV->getIndices()[0] == 0)
1398  EV->replaceAllUsesWith(Loaded);
1399  else
1401 
1402  PrunedInsts.push_back(EV);
1403  }
1404 
1405  // We can remove the instructions now we're no longer iterating through them.
1406  for (auto EV : PrunedInsts)
1407  EV->eraseFromParent();
1408 
1409  if (!CI->use_empty()) {
1410  // Some use of the full struct return that we don't understand has happened,
1411  // so we've got to reconstruct it properly.
1412  Value *Res;
1413  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1414  Res = Builder.CreateInsertValue(Res, Success, 1);
1415 
1416  CI->replaceAllUsesWith(Res);
1417  }
1418 
1419  CI->eraseFromParent();
1420  return true;
1421 }
1422 
1423 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1424  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1425  if(!C)
1426  return false;
1427 
1429  switch(Op) {
1430  case AtomicRMWInst::Add:
1431  case AtomicRMWInst::Sub:
1432  case AtomicRMWInst::Or:
1433  case AtomicRMWInst::Xor:
1434  return C->isZero();
1435  case AtomicRMWInst::And:
1436  return C->isMinusOne();
1437  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1438  default:
1439  return false;
1440  }
1441 }
1442 
1443 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1444  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1445  tryExpandAtomicLoad(ResultingLoad);
1446  return true;
1447  }
1448  return false;
1449 }
1450 
1451 Value *AtomicExpand::insertRMWCmpXchgLoop(
1452  IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1453  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1454  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1455  CreateCmpXchgInstFun CreateCmpXchg) {
1456  LLVMContext &Ctx = Builder.getContext();
1457  BasicBlock *BB = Builder.GetInsertBlock();
1458  Function *F = BB->getParent();
1459 
1460  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1461  //
1462  // The standard expansion we produce is:
1463  // [...]
1464  // %init_loaded = load atomic iN* %addr
1465  // br label %loop
1466  // loop:
1467  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1468  // %new = some_op iN %loaded, %incr
1469  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1470  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1471  // %success = extractvalue { iN, i1 } %pair, 1
1472  // br i1 %success, label %atomicrmw.end, label %loop
1473  // atomicrmw.end:
1474  // [...]
1475  BasicBlock *ExitBB =
1476  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1477  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1478 
1479  // The split call above "helpfully" added a branch at the end of BB (to the
1480  // wrong place), but we want a load. It's easiest to just remove
1481  // the branch entirely.
1482  std::prev(BB->end())->eraseFromParent();
1483  Builder.SetInsertPoint(BB);
1484  LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1485  Builder.CreateBr(LoopBB);
1486 
1487  // Start the main loop block now that we've taken care of the preliminaries.
1488  Builder.SetInsertPoint(LoopBB);
1489  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1490  Loaded->addIncoming(InitLoaded, BB);
1491 
1492  Value *NewVal = PerformOp(Builder, Loaded);
1493 
1494  Value *NewLoaded = nullptr;
1495  Value *Success = nullptr;
1496 
1497  CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1498  MemOpOrder == AtomicOrdering::Unordered
1500  : MemOpOrder,
1501  SSID, Success, NewLoaded);
1502  assert(Success && NewLoaded);
1503 
1504  Loaded->addIncoming(NewLoaded, LoopBB);
1505 
1506  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1507 
1508  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1509  return NewLoaded;
1510 }
1511 
1512 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1513  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1514  unsigned ValueSize = getAtomicOpSize(CI);
1515 
1516  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1517  default:
1518  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1520  if (ValueSize < MinCASSize)
1521  return expandPartwordCmpXchg(CI);
1522  return false;
1524  return expandAtomicCmpXchg(CI);
1525  }
1527  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1528  return true;
1529  }
1530 }
1531 
1532 // Note: This function is exposed externally by AtomicExpandUtils.h
1534  CreateCmpXchgInstFun CreateCmpXchg) {
1535  IRBuilder<> Builder(AI);
1536  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1537  Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1538  AI->getOrdering(), AI->getSyncScopeID(),
1539  [&](IRBuilder<> &Builder, Value *Loaded) {
1540  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1541  AI->getValOperand());
1542  },
1543  CreateCmpXchg);
1544 
1545  AI->replaceAllUsesWith(Loaded);
1546  AI->eraseFromParent();
1547  return true;
1548 }
1549 
1550 // In order to use one of the sized library calls such as
1551 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1552 // must be one of the potentially-specialized sizes, and the value
1553 // type must actually exist in C on the target (otherwise, the
1554 // function wouldn't actually be defined.)
1555 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1556  const DataLayout &DL) {
1557  // TODO: "LargestSize" is an approximation for "largest type that
1558  // you can express in C". It seems to be the case that int128 is
1559  // supported on all 64-bit platforms, otherwise only up to 64-bit
1560  // integers are supported. If we get this wrong, then we'll try to
1561  // call a sized libcall that doesn't actually exist. There should
1562  // really be some more reliable way in LLVM of determining integer
1563  // sizes which are valid in the target's C ABI...
1564  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1565  return Alignment >= Size &&
1566  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1567  Size <= LargestSize;
1568 }
1569 
1570 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1571  static const RTLIB::Libcall Libcalls[6] = {
1572  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1573  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1574  unsigned Size = getAtomicOpSize(I);
1575 
1576  bool expanded = expandAtomicOpToLibcall(
1577  I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1578  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1579  if (!expanded)
1580  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1581 }
1582 
1583 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1584  static const RTLIB::Libcall Libcalls[6] = {
1585  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1586  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1587  unsigned Size = getAtomicOpSize(I);
1588 
1589  bool expanded = expandAtomicOpToLibcall(
1590  I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1591  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1592  if (!expanded)
1593  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1594 }
1595 
1596 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1597  static const RTLIB::Libcall Libcalls[6] = {
1598  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1599  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1600  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1601  unsigned Size = getAtomicOpSize(I);
1602 
1603  bool expanded = expandAtomicOpToLibcall(
1604  I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1605  I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1606  Libcalls);
1607  if (!expanded)
1608  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1609 }
1610 
1612  static const RTLIB::Libcall LibcallsXchg[6] = {
1613  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1614  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1615  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1616  static const RTLIB::Libcall LibcallsAdd[6] = {
1617  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1618  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1619  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1620  static const RTLIB::Libcall LibcallsSub[6] = {
1621  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1622  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1623  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1624  static const RTLIB::Libcall LibcallsAnd[6] = {
1625  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1626  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1627  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1628  static const RTLIB::Libcall LibcallsOr[6] = {
1629  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1630  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1631  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1632  static const RTLIB::Libcall LibcallsXor[6] = {
1633  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1634  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1635  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1636  static const RTLIB::Libcall LibcallsNand[6] = {
1637  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1638  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1639  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1640 
1641  switch (Op) {
1643  llvm_unreachable("Should not have BAD_BINOP.");
1644  case AtomicRMWInst::Xchg:
1645  return makeArrayRef(LibcallsXchg);
1646  case AtomicRMWInst::Add:
1647  return makeArrayRef(LibcallsAdd);
1648  case AtomicRMWInst::Sub:
1649  return makeArrayRef(LibcallsSub);
1650  case AtomicRMWInst::And:
1651  return makeArrayRef(LibcallsAnd);
1652  case AtomicRMWInst::Or:
1653  return makeArrayRef(LibcallsOr);
1654  case AtomicRMWInst::Xor:
1655  return makeArrayRef(LibcallsXor);
1656  case AtomicRMWInst::Nand:
1657  return makeArrayRef(LibcallsNand);
1658  case AtomicRMWInst::Max:
1659  case AtomicRMWInst::Min:
1660  case AtomicRMWInst::UMax:
1661  case AtomicRMWInst::UMin:
1662  case AtomicRMWInst::FAdd:
1663  case AtomicRMWInst::FSub:
1664  // No atomic libcalls are available for max/min/umax/umin.
1665  return {};
1666  }
1667  llvm_unreachable("Unexpected AtomicRMW operation.");
1668 }
1669 
1670 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1671  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1672 
1673  unsigned Size = getAtomicOpSize(I);
1674 
1675  bool Success = false;
1676  if (!Libcalls.empty())
1677  Success = expandAtomicOpToLibcall(
1678  I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1679  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1680 
1681  // The expansion failed: either there were no libcalls at all for
1682  // the operation (min/max), or there were only size-specialized
1683  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1684  // CAS libcall, via a CAS loop, instead.
1685  if (!Success) {
1687  I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
1688  Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1689  SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1690  // Create the CAS instruction normally...
1691  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1692  Addr, Loaded, NewVal, Alignment, MemOpOrder,
1694  Success = Builder.CreateExtractValue(Pair, 1, "success");
1695  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1696 
1697  // ...and then expand the CAS into a libcall.
1698  expandAtomicCASToLibcall(Pair);
1699  });
1700  }
1701 }
1702 
1703 // A helper routine for the above expandAtomic*ToLibcall functions.
1704 //
1705 // 'Libcalls' contains an array of enum values for the particular
1706 // ATOMIC libcalls to be emitted. All of the other arguments besides
1707 // 'I' are extracted from the Instruction subclass by the
1708 // caller. Depending on the particular call, some will be null.
1709 bool AtomicExpand::expandAtomicOpToLibcall(
1710  Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1711  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1712  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1713  assert(Libcalls.size() == 6);
1714 
1715  LLVMContext &Ctx = I->getContext();
1716  Module *M = I->getModule();
1717  const DataLayout &DL = M->getDataLayout();
1719  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1720 
1721  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1722  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1723 
1724  const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1725 
1726  // TODO: the "order" argument type is "int", not int32. So
1727  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1728  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1729  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1730  Constant *OrderingVal =
1731  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1732  Constant *Ordering2Val = nullptr;
1733  if (CASExpected) {
1734  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1735  Ordering2Val =
1736  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1737  }
1738  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1739 
1740  RTLIB::Libcall RTLibType;
1741  if (UseSizedLibcall) {
1742  switch (Size) {
1743  case 1: RTLibType = Libcalls[1]; break;
1744  case 2: RTLibType = Libcalls[2]; break;
1745  case 4: RTLibType = Libcalls[3]; break;
1746  case 8: RTLibType = Libcalls[4]; break;
1747  case 16: RTLibType = Libcalls[5]; break;
1748  }
1749  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1750  RTLibType = Libcalls[0];
1751  } else {
1752  // Can't use sized function, and there's no generic for this
1753  // operation, so give up.
1754  return false;
1755  }
1756 
1757  if (!TLI->getLibcallName(RTLibType)) {
1758  // This target does not implement the requested atomic libcall so give up.
1759  return false;
1760  }
1761 
1762  // Build up the function call. There's two kinds. First, the sized
1763  // variants. These calls are going to be one of the following (with
1764  // N=1,2,4,8,16):
1765  // iN __atomic_load_N(iN *ptr, int ordering)
1766  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1767  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1768  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1769  // int success_order, int failure_order)
1770  //
1771  // Note that these functions can be used for non-integer atomic
1772  // operations, the values just need to be bitcast to integers on the
1773  // way in and out.
1774  //
1775  // And, then, the generic variants. They look like the following:
1776  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1777  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1778  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1779  // int ordering)
1780  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1781  // void *desired, int success_order,
1782  // int failure_order)
1783  //
1784  // The different signatures are built up depending on the
1785  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1786  // variables.
1787 
1788  AllocaInst *AllocaCASExpected = nullptr;
1789  Value *AllocaCASExpected_i8 = nullptr;
1790  AllocaInst *AllocaValue = nullptr;
1791  Value *AllocaValue_i8 = nullptr;
1792  AllocaInst *AllocaResult = nullptr;
1793  Value *AllocaResult_i8 = nullptr;
1794 
1795  Type *ResultTy;
1797  AttributeList Attr;
1798 
1799  // 'size' argument.
1800  if (!UseSizedLibcall) {
1801  // Note, getIntPtrType is assumed equivalent to size_t.
1802  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1803  }
1804 
1805  // 'ptr' argument.
1806  // note: This assumes all address spaces share a common libfunc
1807  // implementation and that addresses are convertable. For systems without
1808  // that property, we'd need to extend this mechanism to support AS-specific
1809  // families of atomic intrinsics.
1810  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1811  Value *PtrVal = Builder.CreateBitCast(PointerOperand,
1812  Type::getInt8PtrTy(Ctx, PtrTypeAS));
1813  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1814  Args.push_back(PtrVal);
1815 
1816  // 'expected' argument, if present.
1817  if (CASExpected) {
1818  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1819  AllocaCASExpected->setAlignment(AllocaAlignment);
1820  unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
1821 
1822  AllocaCASExpected_i8 =
1823  Builder.CreateBitCast(AllocaCASExpected,
1824  Type::getInt8PtrTy(Ctx, AllocaAS));
1825  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1826  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1827  Args.push_back(AllocaCASExpected_i8);
1828  }
1829 
1830  // 'val' argument ('desired' for cas), if present.
1831  if (ValueOperand) {
1832  if (UseSizedLibcall) {
1833  Value *IntValue =
1834  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1835  Args.push_back(IntValue);
1836  } else {
1837  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1838  AllocaValue->setAlignment(AllocaAlignment);
1839  AllocaValue_i8 =
1840  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1841  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1842  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1843  Args.push_back(AllocaValue_i8);
1844  }
1845  }
1846 
1847  // 'ret' argument.
1848  if (!CASExpected && HasResult && !UseSizedLibcall) {
1849  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1850  AllocaResult->setAlignment(AllocaAlignment);
1851  unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
1852  AllocaResult_i8 =
1853  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1854  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1855  Args.push_back(AllocaResult_i8);
1856  }
1857 
1858  // 'ordering' ('success_order' for cas) argument.
1859  Args.push_back(OrderingVal);
1860 
1861  // 'failure_order' argument, if present.
1862  if (Ordering2Val)
1863  Args.push_back(Ordering2Val);
1864 
1865  // Now, the return type.
1866  if (CASExpected) {
1867  ResultTy = Type::getInt1Ty(Ctx);
1868  Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1869  } else if (HasResult && UseSizedLibcall)
1870  ResultTy = SizedIntTy;
1871  else
1872  ResultTy = Type::getVoidTy(Ctx);
1873 
1874  // Done with setting up arguments and return types, create the call:
1875  SmallVector<Type *, 6> ArgTys;
1876  for (Value *Arg : Args)
1877  ArgTys.push_back(Arg->getType());
1878  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1879  FunctionCallee LibcallFn =
1880  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1881  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1882  Call->setAttributes(Attr);
1883  Value *Result = Call;
1884 
1885  // And then, extract the results...
1886  if (ValueOperand && !UseSizedLibcall)
1887  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1888 
1889  if (CASExpected) {
1890  // The final result from the CAS is {load of 'expected' alloca, bool result
1891  // from call}
1892  Type *FinalResultTy = I->getType();
1893  Value *V = UndefValue::get(FinalResultTy);
1894  Value *ExpectedOut = Builder.CreateAlignedLoad(
1895  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1896  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1897  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1898  V = Builder.CreateInsertValue(V, Result, 1);
1899  I->replaceAllUsesWith(V);
1900  } else if (HasResult) {
1901  Value *V;
1902  if (UseSizedLibcall)
1903  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1904  else {
1905  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1906  AllocaAlignment);
1907  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1908  }
1909  I->replaceAllUsesWith(V);
1910  }
1911  I->eraseFromParent();
1912  return true;
1913 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
ValueTypes.h
GetRMWLibcall
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Definition: AtomicExpandPass.cpp:1611
llvm::AtomicRMWInst::getOperationName
static StringRef getOperationName(BinOp Op)
Definition: Instructions.cpp:1653
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:200
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
canUseSizedAtomicCall
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
Definition: AtomicExpandPass.cpp:1555
AtomicOrdering.h
InstIterator.h
llvm::Function
Definition: Function.h:61
Pass.h
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:752
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:691
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:804
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:738
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:104
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AtomicCmpXchgInst::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:604
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
llvm::AtomicCmpXchgInst::isVolatile
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:577
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:325
OptimizationRemarkEmitter.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
Module.h
llvm::AttributeList
Definition: Attributes.h:398
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:357
llvm::LLVMContext::getSyncScopeNames
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Definition: LLVMContext.cpp:294
T
#define T
Definition: Mips16ISelLowering.cpp:341
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:223
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
F
#define F(x, y, z)
Definition: MD5.cpp:56
createCmpXchgInstFun
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
Definition: AtomicExpandPass.cpp:506
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::AtomicRMWInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:865
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
Instruction.h
TargetLowering.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::initializeAtomicExpandPass
void initializeAtomicExpandPass(PassRegistry &)
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:658
INITIALIZE_PASS
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Definition: AtomicExpandPass.cpp:132
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3189
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::AtomicCmpXchgInst::getFailureOrdering
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:616
llvm::ExtractValueInst::getNumIndices
unsigned getNumIndices() const
Definition: Instructions.h:2449
llvm::LoadInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:215
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:799
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
llvm::Instruction
Definition: Instruction.h:45
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::AtomicExpandID
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
Definition: AtomicExpandPass.cpp:130
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::StoreInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:343
AtomicExpandUtils.h
insertMaskedValue
static Value * insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:753
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:740
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:742
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:242
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
Type.h
llvm::AtomicCmpXchgInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:581
llvm::EVT::getStoreSizeInBits
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:374
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:760
BasicBlock.h
llvm::AtomicCmpXchgInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:642
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:192
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:744
uint64_t
RuntimeLibcalls.h
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:844
llvm::SyncScope::ID
uint8_t ID
Definition: LLVMContext.h:47
getAtomicOpSize
static unsigned getAtomicOpSize(LoadInst *LI)
Definition: AtomicExpandPass.cpp:138
performMaskedAtomicOp
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
Definition: AtomicExpandPass.cpp:771
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:756
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:750
ArrayRef.h
llvm::AtomicRMWInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:856
TargetPassConfig.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:655
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::AtomicCmpXchgInst::isWeak
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:584
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
extractMaskedValue
static Value * extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:742
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::toCABI
AtomicOrderingCABI toCABI(AtomicOrdering AO)
Definition: AtomicOrdering.h:146
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
DataLayout.h
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
TargetSubtargetInfo.h
llvm::AtomicCmpXchgInst::setWeak
void setWeak(bool IsWeak)
Definition: Instructions.h:586
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:824
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:232
atomicSizeSupported
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Definition: AtomicExpandPass.cpp:162
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::ValueType
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Definition: ScheduleDAGInstrs.h:106
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:726
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:855
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AtomicExpandPass.cpp:56
Attributes.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
createMaskInstrs
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
Definition: AtomicExpandPass.cpp:690
llvm::AtomicRMWInst::isVolatile
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:834
Constant.h
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:848
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:207
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:250
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:766
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2380
Success
#define Success
Definition: AArch64Disassembler.cpp:260
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:746
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:261
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::inst_end
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:132
llvm::AtomicCmpXchgInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:651
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:738
llvm::AttributeList::addRetAttribute
LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:511
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::AtomicRMWInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:873
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:124
llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition: Instructions.h:869
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
Instructions.h
llvm::LoadInst::isVolatile
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:212
llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly
performAtomicOp
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: AtomicExpandPass.cpp:534
SmallVector.h
User.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:763
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2633
llvm::AtomicCmpXchgInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:566
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::expandAtomicRMWToCmpXchg
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Definition: AtomicExpandPass.cpp:1533
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:770
llvm::TargetLoweringBase::getMaxAtomicSizeInBitsSupported
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
Definition: TargetLowering.h:1866
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:275
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::inst_begin
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:131
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::InstIterator
Definition: InstIterator.h:32
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:758
raw_ostream.h
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Value.h
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:674
InitializePasses.h
llvm::AtomicCmpXchgInst::getMergedOrdering
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:629
llvm::ExtractValueInst::getIndices
ArrayRef< unsigned > getIndices() const
Definition: Instructions.h:2445
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1114
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:754