LLVM  13.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/InstIterator.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/InitializePasses.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <iterator>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "atomic-expand"
56 
57 namespace {
58 
59  class AtomicExpand: public FunctionPass {
60  const TargetLowering *TLI = nullptr;
61 
62  public:
63  static char ID; // Pass identification, replacement for typeid
64 
65  AtomicExpand() : FunctionPass(ID) {
67  }
68 
69  bool runOnFunction(Function &F) override;
70 
71  private:
72  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
73  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
74  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
75  bool tryExpandAtomicLoad(LoadInst *LI);
76  bool expandAtomicLoadToLL(LoadInst *LI);
77  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
78  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
79  bool expandAtomicStore(StoreInst *SI);
80  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
81  Value *
82  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
83  Align AddrAlign, AtomicOrdering MemOpOrder,
84  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
85  void expandAtomicOpToLLSC(
86  Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
87  AtomicOrdering MemOpOrder,
88  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
89  void expandPartwordAtomicRMW(
92  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
93  bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
94  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
95  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
96 
97  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
98  static Value *insertRMWCmpXchgLoop(
99  IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
100  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
101  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
102  CreateCmpXchgInstFun CreateCmpXchg);
103  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104 
105  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
106  bool isIdempotentRMW(AtomicRMWInst *RMWI);
107  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
108 
109  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
110  Value *PointerOperand, Value *ValueOperand,
111  Value *CASExpected, AtomicOrdering Ordering,
112  AtomicOrdering Ordering2,
113  ArrayRef<RTLIB::Libcall> Libcalls);
114  void expandAtomicLoadToLibcall(LoadInst *LI);
115  void expandAtomicStoreToLibcall(StoreInst *LI);
116  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
117  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
118 
119  friend bool
121  CreateCmpXchgInstFun CreateCmpXchg);
122  };
123 
124 } // end anonymous namespace
125 
126 char AtomicExpand::ID = 0;
127 
129 
130 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
131  false, false)
132 
133 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
134 
135 // Helper functions to retrieve the size of atomic instructions.
136 static unsigned getAtomicOpSize(LoadInst *LI) {
137  const DataLayout &DL = LI->getModule()->getDataLayout();
138  return DL.getTypeStoreSize(LI->getType());
139 }
140 
141 static unsigned getAtomicOpSize(StoreInst *SI) {
142  const DataLayout &DL = SI->getModule()->getDataLayout();
143  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
144 }
145 
146 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
147  const DataLayout &DL = RMWI->getModule()->getDataLayout();
148  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
149 }
150 
151 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
152  const DataLayout &DL = CASI->getModule()->getDataLayout();
153  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
154 }
155 
156 // Determine if a particular atomic operation has a supported size,
157 // and is of appropriate alignment, to be passed through for target
158 // lowering. (Versus turning into a __atomic libcall)
159 template <typename Inst>
160 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
161  unsigned Size = getAtomicOpSize(I);
162  Align Alignment = I->getAlign();
163  return Alignment >= Size &&
165 }
166 
168  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
169  if (!TPC)
170  return false;
171 
172  auto &TM = TPC->getTM<TargetMachine>();
173  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
174  return false;
175  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
176 
177  SmallVector<Instruction *, 1> AtomicInsts;
178 
179  // Changing control-flow while iterating through it is a bad idea, so gather a
180  // list of all atomic instructions before we start.
181  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
182  Instruction *I = &*II;
183  if (I->isAtomic() && !isa<FenceInst>(I))
184  AtomicInsts.push_back(I);
185  }
186 
187  bool MadeChange = false;
188  for (auto I : AtomicInsts) {
189  auto LI = dyn_cast<LoadInst>(I);
190  auto SI = dyn_cast<StoreInst>(I);
191  auto RMWI = dyn_cast<AtomicRMWInst>(I);
192  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
193  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
194 
195  // If the Size/Alignment is not supported, replace with a libcall.
196  if (LI) {
197  if (!atomicSizeSupported(TLI, LI)) {
198  expandAtomicLoadToLibcall(LI);
199  MadeChange = true;
200  continue;
201  }
202  } else if (SI) {
203  if (!atomicSizeSupported(TLI, SI)) {
204  expandAtomicStoreToLibcall(SI);
205  MadeChange = true;
206  continue;
207  }
208  } else if (RMWI) {
209  if (!atomicSizeSupported(TLI, RMWI)) {
210  expandAtomicRMWToLibcall(RMWI);
211  MadeChange = true;
212  continue;
213  }
214  } else if (CASI) {
215  if (!atomicSizeSupported(TLI, CASI)) {
216  expandAtomicCASToLibcall(CASI);
217  MadeChange = true;
218  continue;
219  }
220  }
221 
222  if (TLI->shouldInsertFencesForAtomic(I)) {
223  auto FenceOrdering = AtomicOrdering::Monotonic;
224  if (LI && isAcquireOrStronger(LI->getOrdering())) {
225  FenceOrdering = LI->getOrdering();
226  LI->setOrdering(AtomicOrdering::Monotonic);
227  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
228  FenceOrdering = SI->getOrdering();
229  SI->setOrdering(AtomicOrdering::Monotonic);
230  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
231  isAcquireOrStronger(RMWI->getOrdering()))) {
232  FenceOrdering = RMWI->getOrdering();
233  RMWI->setOrdering(AtomicOrdering::Monotonic);
234  } else if (CASI &&
235  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
237  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
238  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
239  // If a compare and swap is lowered to LL/SC, we can do smarter fence
240  // insertion, with a stronger one on the success path than on the
241  // failure path. As a result, fence insertion is directly done by
242  // expandAtomicCmpXchg in that case.
243  FenceOrdering = CASI->getSuccessOrdering();
244  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
245  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
246  }
247 
248  if (FenceOrdering != AtomicOrdering::Monotonic) {
249  MadeChange |= bracketInstWithFences(I, FenceOrdering);
250  }
251  }
252 
253  if (LI) {
254  if (LI->getType()->isFloatingPointTy()) {
255  // TODO: add a TLI hook to control this so that each target can
256  // convert to lowering the original type one at a time.
257  LI = convertAtomicLoadToIntegerType(LI);
258  assert(LI->getType()->isIntegerTy() && "invariant broken");
259  MadeChange = true;
260  }
261 
262  MadeChange |= tryExpandAtomicLoad(LI);
263  } else if (SI) {
264  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
265  // TODO: add a TLI hook to control this so that each target can
266  // convert to lowering the original type one at a time.
267  SI = convertAtomicStoreToIntegerType(SI);
268  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
269  "invariant broken");
270  MadeChange = true;
271  }
272 
273  if (TLI->shouldExpandAtomicStoreInIR(SI))
274  MadeChange |= expandAtomicStore(SI);
275  } else if (RMWI) {
276  // There are two different ways of expanding RMW instructions:
277  // - into a load if it is idempotent
278  // - into a Cmpxchg/LL-SC loop otherwise
279  // we try them in that order.
280 
281  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
282  MadeChange = true;
283  } else {
284  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
285  unsigned ValueSize = getAtomicOpSize(RMWI);
286  AtomicRMWInst::BinOp Op = RMWI->getOperation();
287  if (ValueSize < MinCASSize &&
289  Op == AtomicRMWInst::And)) {
290  RMWI = widenPartwordAtomicRMW(RMWI);
291  MadeChange = true;
292  }
293 
294  MadeChange |= tryExpandAtomicRMW(RMWI);
295  }
296  } else if (CASI) {
297  // TODO: when we're ready to make the change at the IR level, we can
298  // extend convertCmpXchgToInteger for floating point too.
299  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
300  "unimplemented - floating point not legal at IR level");
301  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
302  // TODO: add a TLI hook to control this so that each target can
303  // convert to lowering the original type one at a time.
304  CASI = convertCmpXchgToIntegerType(CASI);
305  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
306  "invariant broken");
307  MadeChange = true;
308  }
309 
310  MadeChange |= tryExpandAtomicCmpXchg(CASI);
311  }
312  }
313  return MadeChange;
314 }
315 
316 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
318 
319  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
320 
321  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
322  // We have a guard here because not every atomic operation generates a
323  // trailing fence.
324  if (TrailingFence)
325  TrailingFence->moveAfter(I);
326 
327  return (LeadingFence || TrailingFence);
328 }
329 
330 /// Get the iX type with the same bitwidth as T.
331 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
332  const DataLayout &DL) {
333  EVT VT = TLI->getMemValueType(DL, T);
334  unsigned BitWidth = VT.getStoreSizeInBits();
335  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
336  return IntegerType::get(T->getContext(), BitWidth);
337 }
338 
339 /// Convert an atomic load of a non-integral type to an integer load of the
340 /// equivalent bitwidth. See the function comment on
341 /// convertAtomicStoreToIntegerType for background.
342 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
343  auto *M = LI->getModule();
344  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
345  M->getDataLayout());
346 
347  IRBuilder<> Builder(LI);
348 
349  Value *Addr = LI->getPointerOperand();
350  Type *PT = PointerType::get(NewTy,
351  Addr->getType()->getPointerAddressSpace());
352  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
353 
354  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
355  NewLI->setAlignment(LI->getAlign());
356  NewLI->setVolatile(LI->isVolatile());
357  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
358  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
359 
360  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
361  LI->replaceAllUsesWith(NewVal);
362  LI->eraseFromParent();
363  return NewLI;
364 }
365 
366 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
367  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
369  return false;
371  expandAtomicOpToLLSC(
372  LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
373  LI->getOrdering(),
374  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
375  return true;
377  return expandAtomicLoadToLL(LI);
379  return expandAtomicLoadToCmpXchg(LI);
380  default:
381  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
382  }
383 }
384 
385 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
386  IRBuilder<> Builder(LI);
387 
388  // On some architectures, load-linked instructions are atomic for larger
389  // sizes than normal loads. For example, the only 64-bit load guaranteed
390  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
391  Value *Val =
392  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
393  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
394 
395  LI->replaceAllUsesWith(Val);
396  LI->eraseFromParent();
397 
398  return true;
399 }
400 
401 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
402  IRBuilder<> Builder(LI);
403  AtomicOrdering Order = LI->getOrdering();
404  if (Order == AtomicOrdering::Unordered)
406 
407  Value *Addr = LI->getPointerOperand();
408  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
409  Constant *DummyVal = Constant::getNullValue(Ty);
410 
411  Value *Pair = Builder.CreateAtomicCmpXchg(
412  Addr, DummyVal, DummyVal, LI->getAlign(), Order,
414  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
415 
416  LI->replaceAllUsesWith(Loaded);
417  LI->eraseFromParent();
418 
419  return true;
420 }
421 
422 /// Convert an atomic store of a non-integral type to an integer store of the
423 /// equivalent bitwidth. We used to not support floating point or vector
424 /// atomics in the IR at all. The backends learned to deal with the bitcast
425 /// idiom because that was the only way of expressing the notion of a atomic
426 /// float or vector store. The long term plan is to teach each backend to
427 /// instruction select from the original atomic store, but as a migration
428 /// mechanism, we convert back to the old format which the backends understand.
429 /// Each backend will need individual work to recognize the new format.
430 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
432  auto *M = SI->getModule();
433  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
434  M->getDataLayout());
435  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
436 
437  Value *Addr = SI->getPointerOperand();
438  Type *PT = PointerType::get(NewTy,
439  Addr->getType()->getPointerAddressSpace());
440  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
441 
442  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
443  NewSI->setAlignment(SI->getAlign());
444  NewSI->setVolatile(SI->isVolatile());
445  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
446  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
447  SI->eraseFromParent();
448  return NewSI;
449 }
450 
451 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
452  // This function is only called on atomic stores that are too large to be
453  // atomic if implemented as a native store. So we replace them by an
454  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
455  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
456  // It is the responsibility of the target to only signal expansion via
457  // shouldExpandAtomicRMW in cases where this is required and possible.
459  AtomicRMWInst *AI = Builder.CreateAtomicRMW(
460  AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
461  SI->getAlign(), SI->getOrdering());
462  SI->eraseFromParent();
463 
464  // Now we have an appropriate swap instruction, lower it as usual.
465  return tryExpandAtomicRMW(AI);
466 }
467 
469  Value *Loaded, Value *NewVal, Align AddrAlign,
470  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
471  Value *&Success, Value *&NewLoaded) {
472  Type *OrigTy = NewVal->getType();
473 
474  // This code can go away when cmpxchg supports FP types.
475  bool NeedBitcast = OrigTy->isFloatingPointTy();
476  if (NeedBitcast) {
477  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
478  unsigned AS = Addr->getType()->getPointerAddressSpace();
479  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
480  NewVal = Builder.CreateBitCast(NewVal, IntTy);
481  Loaded = Builder.CreateBitCast(Loaded, IntTy);
482  }
483 
484  Value *Pair = Builder.CreateAtomicCmpXchg(
485  Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
487  Success = Builder.CreateExtractValue(Pair, 1, "success");
488  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
489 
490  if (NeedBitcast)
491  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
492 }
493 
494 /// Emit IR to implement the given atomicrmw operation on values in registers,
495 /// returning the new value.
497  Value *Loaded, Value *Inc) {
498  Value *NewVal;
499  switch (Op) {
500  case AtomicRMWInst::Xchg:
501  return Inc;
502  case AtomicRMWInst::Add:
503  return Builder.CreateAdd(Loaded, Inc, "new");
504  case AtomicRMWInst::Sub:
505  return Builder.CreateSub(Loaded, Inc, "new");
506  case AtomicRMWInst::And:
507  return Builder.CreateAnd(Loaded, Inc, "new");
508  case AtomicRMWInst::Nand:
509  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
510  case AtomicRMWInst::Or:
511  return Builder.CreateOr(Loaded, Inc, "new");
512  case AtomicRMWInst::Xor:
513  return Builder.CreateXor(Loaded, Inc, "new");
514  case AtomicRMWInst::Max:
515  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
516  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
517  case AtomicRMWInst::Min:
518  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
519  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
520  case AtomicRMWInst::UMax:
521  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
522  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
523  case AtomicRMWInst::UMin:
524  NewVal = Builder.CreateICmpULE(Loaded, Inc);
525  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
526  case AtomicRMWInst::FAdd:
527  return Builder.CreateFAdd(Loaded, Inc, "new");
528  case AtomicRMWInst::FSub:
529  return Builder.CreateFSub(Loaded, Inc, "new");
530  default:
531  llvm_unreachable("Unknown atomic op");
532  }
533 }
534 
535 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
536  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
538  return false;
540  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
541  unsigned ValueSize = getAtomicOpSize(AI);
542  if (ValueSize < MinCASSize) {
543  expandPartwordAtomicRMW(AI,
545  } else {
546  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
547  return performAtomicOp(AI->getOperation(), Builder, Loaded,
548  AI->getValOperand());
549  };
550  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
551  AI->getAlign(), AI->getOrdering(), PerformOp);
552  }
553  return true;
554  }
556  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
557  unsigned ValueSize = getAtomicOpSize(AI);
558  if (ValueSize < MinCASSize) {
559  // TODO: Handle atomicrmw fadd/fsub
560  if (AI->getType()->isFloatingPointTy())
561  return false;
562 
563  expandPartwordAtomicRMW(AI,
565  } else {
567  }
568  return true;
569  }
571  expandAtomicRMWToMaskedIntrinsic(AI);
572  return true;
573  }
574  default:
575  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
576  }
577 }
578 
579 namespace {
580 
581 struct PartwordMaskValues {
582  // These three fields are guaranteed to be set by createMaskInstrs.
583  Type *WordType = nullptr;
584  Type *ValueType = nullptr;
585  Value *AlignedAddr = nullptr;
586  Align AlignedAddrAlignment;
587  // The remaining fields can be null.
588  Value *ShiftAmt = nullptr;
589  Value *Mask = nullptr;
590  Value *Inv_Mask = nullptr;
591 };
592 
594 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
595  auto PrintObj = [&O](auto *V) {
596  if (V)
597  O << *V;
598  else
599  O << "nullptr";
600  O << '\n';
601  };
602  O << "PartwordMaskValues {\n";
603  O << " WordType: ";
604  PrintObj(PMV.WordType);
605  O << " ValueType: ";
606  PrintObj(PMV.ValueType);
607  O << " AlignedAddr: ";
608  PrintObj(PMV.AlignedAddr);
609  O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
610  O << " ShiftAmt: ";
611  PrintObj(PMV.ShiftAmt);
612  O << " Mask: ";
613  PrintObj(PMV.Mask);
614  O << " Inv_Mask: ";
615  PrintObj(PMV.Inv_Mask);
616  O << "}\n";
617  return O;
618 }
619 
620 } // end anonymous namespace
621 
622 /// This is a helper function which builds instructions to provide
623 /// values necessary for partword atomic operations. It takes an
624 /// incoming address, Addr, and ValueType, and constructs the address,
625 /// shift-amounts and masks needed to work with a larger value of size
626 /// WordSize.
627 ///
628 /// AlignedAddr: Addr rounded down to a multiple of WordSize
629 ///
630 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
631 /// from AlignAddr for it to have the same value as if
632 /// ValueType was loaded from Addr.
633 ///
634 /// Mask: Value to mask with the value loaded from AlignAddr to
635 /// include only the part that would've been loaded from Addr.
636 ///
637 /// Inv_Mask: The inverse of Mask.
638 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
640  Align AddrAlign,
641  unsigned MinWordSize) {
642  PartwordMaskValues PMV;
643 
644  Module *M = I->getModule();
645  LLVMContext &Ctx = M->getContext();
646  const DataLayout &DL = M->getDataLayout();
647  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
648 
649  PMV.ValueType = ValueType;
650  PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
651  : ValueType;
652  if (PMV.ValueType == PMV.WordType) {
653  PMV.AlignedAddr = Addr;
654  PMV.AlignedAddrAlignment = AddrAlign;
655  return PMV;
656  }
657 
658  assert(ValueSize < MinWordSize);
659 
660  Type *WordPtrType =
661  PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
662 
663  // TODO: we could skip some of this if AddrAlign >= MinWordSize.
664  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
665  PMV.AlignedAddr = Builder.CreateIntToPtr(
666  Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
667  "AlignedAddr");
668  PMV.AlignedAddrAlignment = Align(MinWordSize);
669 
670  Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
671  if (DL.isLittleEndian()) {
672  // turn bytes into bits
673  PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
674  } else {
675  // turn bytes into bits, and count from the other side.
676  PMV.ShiftAmt = Builder.CreateShl(
677  Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
678  }
679 
680  PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
681  PMV.Mask = Builder.CreateShl(
682  ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
683  "Mask");
684  PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
685  return PMV;
686 }
687 
689  const PartwordMaskValues &PMV) {
690  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
691  if (PMV.WordType == PMV.ValueType)
692  return WideWord;
693 
694  Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
695  Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
696  return Trunc;
697 }
698 
700  Value *Updated, const PartwordMaskValues &PMV) {
701  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
702  assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
703  if (PMV.WordType == PMV.ValueType)
704  return Updated;
705 
706  Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
707  Value *Shift =
708  Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
709  Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
710  Value *Or = Builder.CreateOr(And, Shift, "inserted");
711  return Or;
712 }
713 
714 /// Emit IR to implement a masked version of a given atomicrmw
715 /// operation. (That is, only the bits under the Mask should be
716 /// affected by the operation)
718  IRBuilder<> &Builder, Value *Loaded,
719  Value *Shifted_Inc, Value *Inc,
720  const PartwordMaskValues &PMV) {
721  // TODO: update to use
722  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
723  // to merge bits from two values without requiring PMV.Inv_Mask.
724  switch (Op) {
725  case AtomicRMWInst::Xchg: {
726  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
727  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
728  return FinalVal;
729  }
730  case AtomicRMWInst::Or:
731  case AtomicRMWInst::Xor:
732  case AtomicRMWInst::And:
733  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
734  case AtomicRMWInst::Add:
735  case AtomicRMWInst::Sub:
736  case AtomicRMWInst::Nand: {
737  // The other arithmetic ops need to be masked into place.
738  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
739  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
740  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
741  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
742  return FinalVal;
743  }
744  case AtomicRMWInst::Max:
745  case AtomicRMWInst::Min:
746  case AtomicRMWInst::UMax:
747  case AtomicRMWInst::UMin: {
748  // Finally, comparison ops will operate on the full value, so
749  // truncate down to the original size, and expand out again after
750  // doing the operation.
751  Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
752  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
753  Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
754  return FinalVal;
755  }
756  default:
757  llvm_unreachable("Unknown atomic op");
758  }
759 }
760 
761 /// Expand a sub-word atomicrmw operation into an appropriate
762 /// word-sized operation.
763 ///
764 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
765 /// way as a typical atomicrmw expansion. The only difference here is
766 /// that the operation inside of the loop may operate upon only a
767 /// part of the value.
768 void AtomicExpand::expandPartwordAtomicRMW(
770  AtomicOrdering MemOpOrder = AI->getOrdering();
771  SyncScope::ID SSID = AI->getSyncScopeID();
772 
773  IRBuilder<> Builder(AI);
774 
775  PartwordMaskValues PMV =
777  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
778 
779  Value *ValOperand_Shifted =
780  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
781  PMV.ShiftAmt, "ValOperand_Shifted");
782 
783  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
784  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
785  ValOperand_Shifted, AI->getValOperand(), PMV);
786  };
787 
788  Value *OldResult;
790  OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
791  PMV.AlignedAddrAlignment, MemOpOrder,
792  SSID, PerformPartwordOp,
794  } else {
796  OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
797  PMV.AlignedAddrAlignment, MemOpOrder,
798  PerformPartwordOp);
799  }
800 
801  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
802  AI->replaceAllUsesWith(FinalOldResult);
803  AI->eraseFromParent();
804 }
805 
806 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
807 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
808  IRBuilder<> Builder(AI);
810 
812  Op == AtomicRMWInst::And) &&
813  "Unable to widen operation");
814 
815  PartwordMaskValues PMV =
817  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
818 
819  Value *ValOperand_Shifted =
820  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
821  PMV.ShiftAmt, "ValOperand_Shifted");
822 
823  Value *NewOperand;
824 
825  if (Op == AtomicRMWInst::And)
826  NewOperand =
827  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
828  else
829  NewOperand = ValOperand_Shifted;
830 
831  AtomicRMWInst *NewAI =
832  Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
833  PMV.AlignedAddrAlignment, AI->getOrdering());
834 
835  Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
836  AI->replaceAllUsesWith(FinalOldResult);
837  AI->eraseFromParent();
838  return NewAI;
839 }
840 
841 bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
842  // The basic idea here is that we're expanding a cmpxchg of a
843  // smaller memory size up to a word-sized cmpxchg. To do this, we
844  // need to add a retry-loop for strong cmpxchg, so that
845  // modifications to other parts of the word don't cause a spurious
846  // failure.
847 
848  // This generates code like the following:
849  // [[Setup mask values PMV.*]]
850  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
851  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
852  // %InitLoaded = load i32* %addr
853  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
854  // br partword.cmpxchg.loop
855  // partword.cmpxchg.loop:
856  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
857  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
858  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
859  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
860  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
861  // i32 %FullWord_NewVal success_ordering failure_ordering
862  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
863  // %Success = extractvalue { i32, i1 } %NewCI, 1
864  // br i1 %Success, label %partword.cmpxchg.end,
865  // label %partword.cmpxchg.failure
866  // partword.cmpxchg.failure:
867  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
868  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
869  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
870  // label %partword.cmpxchg.end
871  // partword.cmpxchg.end:
872  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
873  // %FinalOldVal = trunc i32 %tmp1 to i8
874  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
875  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
876 
877  Value *Addr = CI->getPointerOperand();
878  Value *Cmp = CI->getCompareOperand();
879  Value *NewVal = CI->getNewValOperand();
880 
881  BasicBlock *BB = CI->getParent();
882  Function *F = BB->getParent();
883  IRBuilder<> Builder(CI);
884  LLVMContext &Ctx = Builder.getContext();
885 
886  BasicBlock *EndBB =
887  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
888  auto FailureBB =
889  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
890  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
891 
892  // The split call above "helpfully" added a branch at the end of BB
893  // (to the wrong place).
894  std::prev(BB->end())->eraseFromParent();
895  Builder.SetInsertPoint(BB);
896 
897  PartwordMaskValues PMV =
899  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
900 
901  // Shift the incoming values over, into the right location in the word.
902  Value *NewVal_Shifted =
903  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
904  Value *Cmp_Shifted =
905  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
906 
907  // Load the entire current word, and mask into place the expected and new
908  // values
909  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
910  InitLoaded->setVolatile(CI->isVolatile());
911  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
912  Builder.CreateBr(LoopBB);
913 
914  // partword.cmpxchg.loop:
915  Builder.SetInsertPoint(LoopBB);
916  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
917  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
918 
919  // Mask/Or the expected and new values into place in the loaded word.
920  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
921  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
922  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
923  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
925  NewCI->setVolatile(CI->isVolatile());
926  // When we're building a strong cmpxchg, we need a loop, so you
927  // might think we could use a weak cmpxchg inside. But, using strong
928  // allows the below comparison for ShouldContinue, and we're
929  // expecting the underlying cmpxchg to be a machine instruction,
930  // which is strong anyways.
931  NewCI->setWeak(CI->isWeak());
932 
933  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
934  Value *Success = Builder.CreateExtractValue(NewCI, 1);
935 
936  if (CI->isWeak())
937  Builder.CreateBr(EndBB);
938  else
939  Builder.CreateCondBr(Success, EndBB, FailureBB);
940 
941  // partword.cmpxchg.failure:
942  Builder.SetInsertPoint(FailureBB);
943  // Upon failure, verify that the masked-out part of the loaded value
944  // has been modified. If it didn't, abort the cmpxchg, since the
945  // masked-in part must've.
946  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
947  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
948  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
949 
950  // Add the second value to the phi from above
951  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
952 
953  // partword.cmpxchg.end:
954  Builder.SetInsertPoint(CI);
955 
956  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
957  Value *Res = UndefValue::get(CI->getType());
958  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
959  Res = Builder.CreateInsertValue(Res, Success, 1);
960 
961  CI->replaceAllUsesWith(Res);
962  CI->eraseFromParent();
963  return true;
964 }
965 
966 void AtomicExpand::expandAtomicOpToLLSC(
967  Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
968  AtomicOrdering MemOpOrder,
969  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
971  Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
972  MemOpOrder, PerformOp);
973 
974  I->replaceAllUsesWith(Loaded);
975  I->eraseFromParent();
976 }
977 
978 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
979  IRBuilder<> Builder(AI);
980 
981  PartwordMaskValues PMV =
983  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
984 
985  // The value operand must be sign-extended for signed min/max so that the
986  // target's signed comparison instructions can be used. Otherwise, just
987  // zero-ext.
988  Instruction::CastOps CastOp = Instruction::ZExt;
989  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
990  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
991  CastOp = Instruction::SExt;
992 
993  Value *ValOperand_Shifted = Builder.CreateShl(
994  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
995  PMV.ShiftAmt, "ValOperand_Shifted");
996  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
997  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
998  AI->getOrdering());
999  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1000  AI->replaceAllUsesWith(FinalOldResult);
1001  AI->eraseFromParent();
1002 }
1003 
1004 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1005  IRBuilder<> Builder(CI);
1006 
1007  PartwordMaskValues PMV = createMaskInstrs(
1008  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1009  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1010 
1011  Value *CmpVal_Shifted = Builder.CreateShl(
1012  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1013  "CmpVal_Shifted");
1014  Value *NewVal_Shifted = Builder.CreateShl(
1015  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1016  "NewVal_Shifted");
1017  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1018  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1019  CI->getSuccessOrdering());
1020  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1021  Value *Res = UndefValue::get(CI->getType());
1022  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1023  Value *Success = Builder.CreateICmpEQ(
1024  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1025  Res = Builder.CreateInsertValue(Res, Success, 1);
1026 
1027  CI->replaceAllUsesWith(Res);
1028  CI->eraseFromParent();
1029 }
1030 
1031 Value *AtomicExpand::insertRMWLLSCLoop(
1032  IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1033  AtomicOrdering MemOpOrder,
1034  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1035  LLVMContext &Ctx = Builder.getContext();
1036  BasicBlock *BB = Builder.GetInsertBlock();
1037  Function *F = BB->getParent();
1038 
1039  assert(AddrAlign >=
1040  F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1041  "Expected at least natural alignment at this point.");
1042 
1043  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1044  //
1045  // The standard expansion we produce is:
1046  // [...]
1047  // atomicrmw.start:
1048  // %loaded = @load.linked(%addr)
1049  // %new = some_op iN %loaded, %incr
1050  // %stored = @store_conditional(%new, %addr)
1051  // %try_again = icmp i32 ne %stored, 0
1052  // br i1 %try_again, label %loop, label %atomicrmw.end
1053  // atomicrmw.end:
1054  // [...]
1055  BasicBlock *ExitBB =
1056  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1057  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1058 
1059  // The split call above "helpfully" added a branch at the end of BB (to the
1060  // wrong place).
1061  std::prev(BB->end())->eraseFromParent();
1062  Builder.SetInsertPoint(BB);
1063  Builder.CreateBr(LoopBB);
1064 
1065  // Start the main loop block now that we've taken care of the preliminaries.
1066  Builder.SetInsertPoint(LoopBB);
1067  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1068 
1069  Value *NewVal = PerformOp(Builder, Loaded);
1070 
1071  Value *StoreSuccess =
1072  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1073  Value *TryAgain = Builder.CreateICmpNE(
1074  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1075  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1076 
1077  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1078  return Loaded;
1079 }
1080 
1081 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1082 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1083 /// IR. As a migration step, we convert back to what use to be the standard
1084 /// way to represent a pointer cmpxchg so that we can update backends one by
1085 /// one.
1086 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1087  auto *M = CI->getModule();
1088  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1089  M->getDataLayout());
1090 
1091  IRBuilder<> Builder(CI);
1092 
1093  Value *Addr = CI->getPointerOperand();
1094  Type *PT = PointerType::get(NewTy,
1095  Addr->getType()->getPointerAddressSpace());
1096  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1097 
1098  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1099  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1100 
1101  auto *NewCI = Builder.CreateAtomicCmpXchg(
1102  NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1103  CI->getFailureOrdering(), CI->getSyncScopeID());
1104  NewCI->setVolatile(CI->isVolatile());
1105  NewCI->setWeak(CI->isWeak());
1106  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1107 
1108  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1109  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1110 
1111  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1112 
1113  Value *Res = UndefValue::get(CI->getType());
1114  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1115  Res = Builder.CreateInsertValue(Res, Succ, 1);
1116 
1117  CI->replaceAllUsesWith(Res);
1118  CI->eraseFromParent();
1119  return NewCI;
1120 }
1121 
1122 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1123  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1124  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1125  Value *Addr = CI->getPointerOperand();
1126  BasicBlock *BB = CI->getParent();
1127  Function *F = BB->getParent();
1128  LLVMContext &Ctx = F->getContext();
1129  // If shouldInsertFencesForAtomic() returns true, then the target does not
1130  // want to deal with memory orders, and emitLeading/TrailingFence should take
1131  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1132  // should preserve the ordering.
1133  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1134  AtomicOrdering MemOpOrder =
1135  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1136 
1137  // In implementations which use a barrier to achieve release semantics, we can
1138  // delay emitting this barrier until we know a store is actually going to be
1139  // attempted. The cost of this delay is that we need 2 copies of the block
1140  // emitting the load-linked, affecting code size.
1141  //
1142  // Ideally, this logic would be unconditional except for the minsize check
1143  // since in other cases the extra blocks naturally collapse down to the
1144  // minimal loop. Unfortunately, this puts too much stress on later
1145  // optimisations so we avoid emitting the extra logic in those cases too.
1146  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1147  SuccessOrder != AtomicOrdering::Monotonic &&
1148  SuccessOrder != AtomicOrdering::Acquire &&
1149  !F->hasMinSize();
1150 
1151  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1152  // do it even on minsize.
1153  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1154 
1155  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1156  //
1157  // The full expansion we produce is:
1158  // [...]
1159  // %aligned.addr = ...
1160  // cmpxchg.start:
1161  // %unreleasedload = @load.linked(%aligned.addr)
1162  // %unreleasedload.extract = extract value from %unreleasedload
1163  // %should_store = icmp eq %unreleasedload.extract, %desired
1164  // br i1 %should_store, label %cmpxchg.releasingstore,
1165  // label %cmpxchg.nostore
1166  // cmpxchg.releasingstore:
1167  // fence?
1168  // br label cmpxchg.trystore
1169  // cmpxchg.trystore:
1170  // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1171  // [%releasedload, %cmpxchg.releasedload]
1172  // %updated.new = insert %new into %loaded.trystore
1173  // %stored = @store_conditional(%updated.new, %aligned.addr)
1174  // %success = icmp eq i32 %stored, 0
1175  // br i1 %success, label %cmpxchg.success,
1176  // label %cmpxchg.releasedload/%cmpxchg.failure
1177  // cmpxchg.releasedload:
1178  // %releasedload = @load.linked(%aligned.addr)
1179  // %releasedload.extract = extract value from %releasedload
1180  // %should_store = icmp eq %releasedload.extract, %desired
1181  // br i1 %should_store, label %cmpxchg.trystore,
1182  // label %cmpxchg.failure
1183  // cmpxchg.success:
1184  // fence?
1185  // br label %cmpxchg.end
1186  // cmpxchg.nostore:
1187  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1188  // [%releasedload,
1189  // %cmpxchg.releasedload/%cmpxchg.trystore]
1190  // @load_linked_fail_balance()?
1191  // br label %cmpxchg.failure
1192  // cmpxchg.failure:
1193  // fence?
1194  // br label %cmpxchg.end
1195  // cmpxchg.end:
1196  // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1197  // [%loaded.trystore, %cmpxchg.trystore]
1198  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1199  // %loaded = extract value from %loaded.exit
1200  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1201  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1202  // [...]
1203  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1204  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1205  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1206  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1207  auto ReleasedLoadBB =
1208  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1209  auto TryStoreBB =
1210  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1211  auto ReleasingStoreBB =
1212  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1213  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1214 
1215  // This grabs the DebugLoc from CI
1216  IRBuilder<> Builder(CI);
1217 
1218  // The split call above "helpfully" added a branch at the end of BB (to the
1219  // wrong place), but we might want a fence too. It's easiest to just remove
1220  // the branch entirely.
1221  std::prev(BB->end())->eraseFromParent();
1222  Builder.SetInsertPoint(BB);
1223  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1224  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1225 
1226  PartwordMaskValues PMV =
1228  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1229  Builder.CreateBr(StartBB);
1230 
1231  // Start the main loop block now that we've taken care of the preliminaries.
1232  Builder.SetInsertPoint(StartBB);
1233  Value *UnreleasedLoad =
1234  TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
1235  Value *UnreleasedLoadExtract =
1236  extractMaskedValue(Builder, UnreleasedLoad, PMV);
1237  Value *ShouldStore = Builder.CreateICmpEQ(
1238  UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1239 
1240  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1241  // jump straight past that fence instruction (if it exists).
1242  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1243 
1244  Builder.SetInsertPoint(ReleasingStoreBB);
1245  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1246  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1247  Builder.CreateBr(TryStoreBB);
1248 
1249  Builder.SetInsertPoint(TryStoreBB);
1250  PHINode *LoadedTryStore =
1251  Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1252  LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1253  Value *NewValueInsert =
1254  insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1255  Value *StoreSuccess =
1256  TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr,
1257  MemOpOrder);
1258  StoreSuccess = Builder.CreateICmpEQ(
1259  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1260  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1261  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1262  CI->isWeak() ? FailureBB : RetryBB);
1263 
1264  Builder.SetInsertPoint(ReleasedLoadBB);
1265  Value *SecondLoad;
1266  if (HasReleasedLoadBB) {
1267  SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
1268  Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1269  ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1270  CI->getCompareOperand(), "should_store");
1271 
1272  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1273  // jump straight past that fence instruction (if it exists).
1274  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1275  // Update PHI node in TryStoreBB.
1276  LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1277  } else
1278  Builder.CreateUnreachable();
1279 
1280  // Make sure later instructions don't get reordered with a fence if
1281  // necessary.
1282  Builder.SetInsertPoint(SuccessBB);
1283  if (ShouldInsertFencesForAtomic)
1284  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1285  Builder.CreateBr(ExitBB);
1286 
1287  Builder.SetInsertPoint(NoStoreBB);
1288  PHINode *LoadedNoStore =
1289  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1290  LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1291  if (HasReleasedLoadBB)
1292  LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1293 
1294  // In the failing case, where we don't execute the store-conditional, the
1295  // target might want to balance out the load-linked with a dedicated
1296  // instruction (e.g., on ARM, clearing the exclusive monitor).
1297  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1298  Builder.CreateBr(FailureBB);
1299 
1300  Builder.SetInsertPoint(FailureBB);
1301  PHINode *LoadedFailure =
1302  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1303  LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1304  if (CI->isWeak())
1305  LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1306  if (ShouldInsertFencesForAtomic)
1307  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1308  Builder.CreateBr(ExitBB);
1309 
1310  // Finally, we have control-flow based knowledge of whether the cmpxchg
1311  // succeeded or not. We expose this to later passes by converting any
1312  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1313  // PHI.
1314  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1315  PHINode *LoadedExit =
1316  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1317  LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1318  LoadedExit->addIncoming(LoadedFailure, FailureBB);
1319  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1320  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1321  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1322 
1323  // This is the "exit value" from the cmpxchg expansion. It may be of
1324  // a type wider than the one in the cmpxchg instruction.
1325  Value *LoadedFull = LoadedExit;
1326 
1327  Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1328  Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1329 
1330  // Look for any users of the cmpxchg that are just comparing the loaded value
1331  // against the desired one, and replace them with the CFG-derived version.
1333  for (auto User : CI->users()) {
1334  ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1335  if (!EV)
1336  continue;
1337 
1338  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1339  "weird extraction from { iN, i1 }");
1340 
1341  if (EV->getIndices()[0] == 0)
1342  EV->replaceAllUsesWith(Loaded);
1343  else
1345 
1346  PrunedInsts.push_back(EV);
1347  }
1348 
1349  // We can remove the instructions now we're no longer iterating through them.
1350  for (auto EV : PrunedInsts)
1351  EV->eraseFromParent();
1352 
1353  if (!CI->use_empty()) {
1354  // Some use of the full struct return that we don't understand has happened,
1355  // so we've got to reconstruct it properly.
1356  Value *Res;
1357  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1358  Res = Builder.CreateInsertValue(Res, Success, 1);
1359 
1360  CI->replaceAllUsesWith(Res);
1361  }
1362 
1363  CI->eraseFromParent();
1364  return true;
1365 }
1366 
1367 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1368  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1369  if(!C)
1370  return false;
1371 
1373  switch(Op) {
1374  case AtomicRMWInst::Add:
1375  case AtomicRMWInst::Sub:
1376  case AtomicRMWInst::Or:
1377  case AtomicRMWInst::Xor:
1378  return C->isZero();
1379  case AtomicRMWInst::And:
1380  return C->isMinusOne();
1381  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1382  default:
1383  return false;
1384  }
1385 }
1386 
1387 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1388  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1389  tryExpandAtomicLoad(ResultingLoad);
1390  return true;
1391  }
1392  return false;
1393 }
1394 
1395 Value *AtomicExpand::insertRMWCmpXchgLoop(
1396  IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1397  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1398  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1399  CreateCmpXchgInstFun CreateCmpXchg) {
1400  LLVMContext &Ctx = Builder.getContext();
1401  BasicBlock *BB = Builder.GetInsertBlock();
1402  Function *F = BB->getParent();
1403 
1404  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1405  //
1406  // The standard expansion we produce is:
1407  // [...]
1408  // %init_loaded = load atomic iN* %addr
1409  // br label %loop
1410  // loop:
1411  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1412  // %new = some_op iN %loaded, %incr
1413  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1414  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1415  // %success = extractvalue { iN, i1 } %pair, 1
1416  // br i1 %success, label %atomicrmw.end, label %loop
1417  // atomicrmw.end:
1418  // [...]
1419  BasicBlock *ExitBB =
1420  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1421  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1422 
1423  // The split call above "helpfully" added a branch at the end of BB (to the
1424  // wrong place), but we want a load. It's easiest to just remove
1425  // the branch entirely.
1426  std::prev(BB->end())->eraseFromParent();
1427  Builder.SetInsertPoint(BB);
1428  LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1429  Builder.CreateBr(LoopBB);
1430 
1431  // Start the main loop block now that we've taken care of the preliminaries.
1432  Builder.SetInsertPoint(LoopBB);
1433  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1434  Loaded->addIncoming(InitLoaded, BB);
1435 
1436  Value *NewVal = PerformOp(Builder, Loaded);
1437 
1438  Value *NewLoaded = nullptr;
1439  Value *Success = nullptr;
1440 
1441  CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1442  MemOpOrder == AtomicOrdering::Unordered
1444  : MemOpOrder,
1445  SSID, Success, NewLoaded);
1446  assert(Success && NewLoaded);
1447 
1448  Loaded->addIncoming(NewLoaded, LoopBB);
1449 
1450  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1451 
1452  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1453  return NewLoaded;
1454 }
1455 
1456 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1457  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1458  unsigned ValueSize = getAtomicOpSize(CI);
1459 
1460  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1461  default:
1462  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1464  if (ValueSize < MinCASSize)
1465  return expandPartwordCmpXchg(CI);
1466  return false;
1468  return expandAtomicCmpXchg(CI);
1469  }
1471  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1472  return true;
1473  }
1474 }
1475 
1476 // Note: This function is exposed externally by AtomicExpandUtils.h
1478  CreateCmpXchgInstFun CreateCmpXchg) {
1479  IRBuilder<> Builder(AI);
1480  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1481  Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1482  AI->getOrdering(), AI->getSyncScopeID(),
1483  [&](IRBuilder<> &Builder, Value *Loaded) {
1484  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1485  AI->getValOperand());
1486  },
1487  CreateCmpXchg);
1488 
1489  AI->replaceAllUsesWith(Loaded);
1490  AI->eraseFromParent();
1491  return true;
1492 }
1493 
1494 // In order to use one of the sized library calls such as
1495 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1496 // must be one of the potentially-specialized sizes, and the value
1497 // type must actually exist in C on the target (otherwise, the
1498 // function wouldn't actually be defined.)
1499 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1500  const DataLayout &DL) {
1501  // TODO: "LargestSize" is an approximation for "largest type that
1502  // you can express in C". It seems to be the case that int128 is
1503  // supported on all 64-bit platforms, otherwise only up to 64-bit
1504  // integers are supported. If we get this wrong, then we'll try to
1505  // call a sized libcall that doesn't actually exist. There should
1506  // really be some more reliable way in LLVM of determining integer
1507  // sizes which are valid in the target's C ABI...
1508  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1509  return Alignment >= Size &&
1510  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1511  Size <= LargestSize;
1512 }
1513 
1514 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1515  static const RTLIB::Libcall Libcalls[6] = {
1516  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1517  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1518  unsigned Size = getAtomicOpSize(I);
1519 
1520  bool expanded = expandAtomicOpToLibcall(
1521  I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1522  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1523  if (!expanded)
1524  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1525 }
1526 
1527 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1528  static const RTLIB::Libcall Libcalls[6] = {
1529  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1530  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1531  unsigned Size = getAtomicOpSize(I);
1532 
1533  bool expanded = expandAtomicOpToLibcall(
1534  I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1535  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1536  if (!expanded)
1537  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1538 }
1539 
1540 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1541  static const RTLIB::Libcall Libcalls[6] = {
1542  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1543  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1544  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1545  unsigned Size = getAtomicOpSize(I);
1546 
1547  bool expanded = expandAtomicOpToLibcall(
1548  I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1549  I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1550  Libcalls);
1551  if (!expanded)
1552  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1553 }
1554 
1556  static const RTLIB::Libcall LibcallsXchg[6] = {
1557  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1558  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1559  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1560  static const RTLIB::Libcall LibcallsAdd[6] = {
1561  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1562  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1563  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1564  static const RTLIB::Libcall LibcallsSub[6] = {
1565  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1566  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1567  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1568  static const RTLIB::Libcall LibcallsAnd[6] = {
1569  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1570  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1571  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1572  static const RTLIB::Libcall LibcallsOr[6] = {
1573  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1574  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1575  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1576  static const RTLIB::Libcall LibcallsXor[6] = {
1577  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1578  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1579  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1580  static const RTLIB::Libcall LibcallsNand[6] = {
1581  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1582  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1583  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1584 
1585  switch (Op) {
1587  llvm_unreachable("Should not have BAD_BINOP.");
1588  case AtomicRMWInst::Xchg:
1589  return makeArrayRef(LibcallsXchg);
1590  case AtomicRMWInst::Add:
1591  return makeArrayRef(LibcallsAdd);
1592  case AtomicRMWInst::Sub:
1593  return makeArrayRef(LibcallsSub);
1594  case AtomicRMWInst::And:
1595  return makeArrayRef(LibcallsAnd);
1596  case AtomicRMWInst::Or:
1597  return makeArrayRef(LibcallsOr);
1598  case AtomicRMWInst::Xor:
1599  return makeArrayRef(LibcallsXor);
1600  case AtomicRMWInst::Nand:
1601  return makeArrayRef(LibcallsNand);
1602  case AtomicRMWInst::Max:
1603  case AtomicRMWInst::Min:
1604  case AtomicRMWInst::UMax:
1605  case AtomicRMWInst::UMin:
1606  case AtomicRMWInst::FAdd:
1607  case AtomicRMWInst::FSub:
1608  // No atomic libcalls are available for max/min/umax/umin.
1609  return {};
1610  }
1611  llvm_unreachable("Unexpected AtomicRMW operation.");
1612 }
1613 
1614 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1615  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1616 
1617  unsigned Size = getAtomicOpSize(I);
1618 
1619  bool Success = false;
1620  if (!Libcalls.empty())
1621  Success = expandAtomicOpToLibcall(
1622  I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1623  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1624 
1625  // The expansion failed: either there were no libcalls at all for
1626  // the operation (min/max), or there were only size-specialized
1627  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1628  // CAS libcall, via a CAS loop, instead.
1629  if (!Success) {
1631  I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
1632  Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1633  SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1634  // Create the CAS instruction normally...
1635  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1636  Addr, Loaded, NewVal, Alignment, MemOpOrder,
1638  Success = Builder.CreateExtractValue(Pair, 1, "success");
1639  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1640 
1641  // ...and then expand the CAS into a libcall.
1642  expandAtomicCASToLibcall(Pair);
1643  });
1644  }
1645 }
1646 
1647 // A helper routine for the above expandAtomic*ToLibcall functions.
1648 //
1649 // 'Libcalls' contains an array of enum values for the particular
1650 // ATOMIC libcalls to be emitted. All of the other arguments besides
1651 // 'I' are extracted from the Instruction subclass by the
1652 // caller. Depending on the particular call, some will be null.
1653 bool AtomicExpand::expandAtomicOpToLibcall(
1654  Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1655  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1656  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1657  assert(Libcalls.size() == 6);
1658 
1659  LLVMContext &Ctx = I->getContext();
1660  Module *M = I->getModule();
1661  const DataLayout &DL = M->getDataLayout();
1663  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1664 
1665  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1666  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1667 
1668  const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1669 
1670  // TODO: the "order" argument type is "int", not int32. So
1671  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1672  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1673  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1674  Constant *OrderingVal =
1675  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1676  Constant *Ordering2Val = nullptr;
1677  if (CASExpected) {
1678  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1679  Ordering2Val =
1680  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1681  }
1682  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1683 
1684  RTLIB::Libcall RTLibType;
1685  if (UseSizedLibcall) {
1686  switch (Size) {
1687  case 1: RTLibType = Libcalls[1]; break;
1688  case 2: RTLibType = Libcalls[2]; break;
1689  case 4: RTLibType = Libcalls[3]; break;
1690  case 8: RTLibType = Libcalls[4]; break;
1691  case 16: RTLibType = Libcalls[5]; break;
1692  }
1693  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1694  RTLibType = Libcalls[0];
1695  } else {
1696  // Can't use sized function, and there's no generic for this
1697  // operation, so give up.
1698  return false;
1699  }
1700 
1701  if (!TLI->getLibcallName(RTLibType)) {
1702  // This target does not implement the requested atomic libcall so give up.
1703  return false;
1704  }
1705 
1706  // Build up the function call. There's two kinds. First, the sized
1707  // variants. These calls are going to be one of the following (with
1708  // N=1,2,4,8,16):
1709  // iN __atomic_load_N(iN *ptr, int ordering)
1710  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1711  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1712  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1713  // int success_order, int failure_order)
1714  //
1715  // Note that these functions can be used for non-integer atomic
1716  // operations, the values just need to be bitcast to integers on the
1717  // way in and out.
1718  //
1719  // And, then, the generic variants. They look like the following:
1720  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1721  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1722  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1723  // int ordering)
1724  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1725  // void *desired, int success_order,
1726  // int failure_order)
1727  //
1728  // The different signatures are built up depending on the
1729  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1730  // variables.
1731 
1732  AllocaInst *AllocaCASExpected = nullptr;
1733  Value *AllocaCASExpected_i8 = nullptr;
1734  AllocaInst *AllocaValue = nullptr;
1735  Value *AllocaValue_i8 = nullptr;
1736  AllocaInst *AllocaResult = nullptr;
1737  Value *AllocaResult_i8 = nullptr;
1738 
1739  Type *ResultTy;
1741  AttributeList Attr;
1742 
1743  // 'size' argument.
1744  if (!UseSizedLibcall) {
1745  // Note, getIntPtrType is assumed equivalent to size_t.
1746  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1747  }
1748 
1749  // 'ptr' argument.
1750  // note: This assumes all address spaces share a common libfunc
1751  // implementation and that addresses are convertable. For systems without
1752  // that property, we'd need to extend this mechanism to support AS-specific
1753  // families of atomic intrinsics.
1754  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1755  Value *PtrVal = Builder.CreateBitCast(PointerOperand,
1756  Type::getInt8PtrTy(Ctx, PtrTypeAS));
1757  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1758  Args.push_back(PtrVal);
1759 
1760  // 'expected' argument, if present.
1761  if (CASExpected) {
1762  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1763  AllocaCASExpected->setAlignment(AllocaAlignment);
1764  unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
1765 
1766  AllocaCASExpected_i8 =
1767  Builder.CreateBitCast(AllocaCASExpected,
1768  Type::getInt8PtrTy(Ctx, AllocaAS));
1769  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1770  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1771  Args.push_back(AllocaCASExpected_i8);
1772  }
1773 
1774  // 'val' argument ('desired' for cas), if present.
1775  if (ValueOperand) {
1776  if (UseSizedLibcall) {
1777  Value *IntValue =
1778  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1779  Args.push_back(IntValue);
1780  } else {
1781  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1782  AllocaValue->setAlignment(AllocaAlignment);
1783  AllocaValue_i8 =
1784  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1785  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1786  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1787  Args.push_back(AllocaValue_i8);
1788  }
1789  }
1790 
1791  // 'ret' argument.
1792  if (!CASExpected && HasResult && !UseSizedLibcall) {
1793  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1794  AllocaResult->setAlignment(AllocaAlignment);
1795  unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
1796  AllocaResult_i8 =
1797  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1798  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1799  Args.push_back(AllocaResult_i8);
1800  }
1801 
1802  // 'ordering' ('success_order' for cas) argument.
1803  Args.push_back(OrderingVal);
1804 
1805  // 'failure_order' argument, if present.
1806  if (Ordering2Val)
1807  Args.push_back(Ordering2Val);
1808 
1809  // Now, the return type.
1810  if (CASExpected) {
1811  ResultTy = Type::getInt1Ty(Ctx);
1812  Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1813  } else if (HasResult && UseSizedLibcall)
1814  ResultTy = SizedIntTy;
1815  else
1816  ResultTy = Type::getVoidTy(Ctx);
1817 
1818  // Done with setting up arguments and return types, create the call:
1819  SmallVector<Type *, 6> ArgTys;
1820  for (Value *Arg : Args)
1821  ArgTys.push_back(Arg->getType());
1822  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1823  FunctionCallee LibcallFn =
1824  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1825  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1826  Call->setAttributes(Attr);
1827  Value *Result = Call;
1828 
1829  // And then, extract the results...
1830  if (ValueOperand && !UseSizedLibcall)
1831  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1832 
1833  if (CASExpected) {
1834  // The final result from the CAS is {load of 'expected' alloca, bool result
1835  // from call}
1836  Type *FinalResultTy = I->getType();
1837  Value *V = UndefValue::get(FinalResultTy);
1838  Value *ExpectedOut = Builder.CreateAlignedLoad(
1839  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1840  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1841  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1842  V = Builder.CreateInsertValue(V, Result, 1);
1843  I->replaceAllUsesWith(V);
1844  } else if (HasResult) {
1845  Value *V;
1846  if (UseSizedLibcall)
1847  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1848  else {
1849  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1850  AllocaAlignment);
1851  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1852  }
1853  I->replaceAllUsesWith(V);
1854  }
1855  I->eraseFromParent();
1856  return true;
1857 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
ValueTypes.h
GetRMWLibcall
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Definition: AtomicExpandPass.cpp:1555
llvm
Definition: AllocatorList.h:23
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:201
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:256
canUseSizedAtomicCall
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
Definition: AtomicExpandPass.cpp:1499
AtomicOrdering.h
InstIterator.h
llvm::Function
Definition: Function.h:61
Pass.h
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:728
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:693
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:781
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:714
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:103
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AtomicCmpXchgInst::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::IRBuilder<>
llvm::AttributeList::addAttribute
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
Definition: Attributes.cpp:1375
llvm::AtomicCmpXchgInst::isVolatile
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:579
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:328
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
Module.h
llvm::AttributeList
Definition: Attributes.h:385
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:357
T
#define T
Definition: Mips16ISelLowering.cpp:341
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:266
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:222
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
F
#define F(x, y, z)
Definition: MD5.cpp:56
createCmpXchgInstFun
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
Definition: AtomicExpandPass.cpp:468
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::AtomicRMWInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:842
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
Instruction.h
TargetLowering.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::initializeAtomicExpandPass
void initializeAtomicExpandPass(PassRegistry &)
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:634
INITIALIZE_PASS
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Definition: AtomicExpandPass.cpp:130
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3141
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::AtomicCmpXchgInst::getFailureOrdering
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:606
llvm::ExtractValueInst::getNumIndices
unsigned getNumIndices() const
Definition: Instructions.h:2387
llvm::LoadInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:214
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:782
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
llvm::Instruction
Definition: Instruction.h:45
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:724
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1099
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::AtomicExpandID
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
Definition: AtomicExpandPass.cpp:128
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
llvm::StoreInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:343
AtomicExpandUtils.h
insertMaskedValue
static Value * insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:699
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:154
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:716
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:718
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:357
Type.h
llvm::AtomicCmpXchgInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:583
llvm::EVT::getStoreSizeInBits
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:366
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:176
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:736
BasicBlock.h
llvm::AttributeList::ReturnIndex
@ ReturnIndex
Definition: Attributes.h:388
llvm::AtomicCmpXchgInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:618
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:720
RuntimeLibcalls.h
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:821
llvm::SyncScope::ID
uint8_t ID
Definition: LLVMContext.h:47
getAtomicOpSize
static unsigned getAtomicOpSize(LoadInst *LI)
Definition: AtomicExpandPass.cpp:136
performMaskedAtomicOp
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
Definition: AtomicExpandPass.cpp:717
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:732
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:726
ArrayRef.h
llvm::AtomicRMWInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:833
TargetPassConfig.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:631
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::AtomicCmpXchgInst::isWeak
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:586
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
extractMaskedValue
static Value * extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:688
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::toCABI
AtomicOrderingCABI toCABI(AtomicOrdering AO)
Definition: AtomicOrdering.h:136
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
DataLayout.h
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:526
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
TargetSubtargetInfo.h
llvm::AtomicCmpXchgInst::setWeak
void setWeak(bool IsWeak)
Definition: Instructions.h:588
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:801
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:231
atomicSizeSupported
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Definition: AtomicExpandPass.cpp:160
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::ValueType
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Definition: ScheduleDAGInstrs.h:106
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:702
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AtomicExpandPass.cpp:55
Attributes.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
createMaskInstrs
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
Definition: AtomicExpandPass.cpp:638
Constant.h
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:205
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:347
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:208
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:246
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:742
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2318
Success
#define Success
Definition: AArch64Disassembler.cpp:248
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:722
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::inst_end
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:132
llvm::AtomicCmpXchgInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:627
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:715
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:123
llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition: Instructions.h:846
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:187
Instructions.h
llvm::LoadInst::isVolatile
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:211
llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly
performAtomicOp
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: AtomicExpandPass.cpp:496
SmallVector.h
User.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:739
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::PHINode
Definition: Instructions.h:2572
llvm::AtomicCmpXchgInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:568
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::expandAtomicRMWToCmpXchg
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Definition: AtomicExpandPass.cpp:1477
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:746
llvm::TargetLoweringBase::getMaxAtomicSizeInBitsSupported
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
Definition: TargetLowering.h:1830
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
DerivedTypes.h
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:276
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::inst_begin
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:131
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:61
llvm::InstIterator
Definition: InstIterator.h:32
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:734
raw_ostream.h
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Value.h
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:650
InitializePasses.h
llvm::ExtractValueInst::getIndices
ArrayRef< unsigned > getIndices() const
Definition: Instructions.h:2383
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:522
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1095
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:102
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:129
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:730