LLVM  16.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/IR/Attributes.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/Constant.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InstIterator.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/Module.h"
40 #include "llvm/IR/Type.h"
41 #include "llvm/IR/User.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Debug.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <iterator>
55 
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "atomic-expand"
59 
60 namespace {
61 
62 class AtomicExpand : public FunctionPass {
63  const TargetLowering *TLI = nullptr;
64  const DataLayout *DL = nullptr;
65 
66 public:
67  static char ID; // Pass identification, replacement for typeid
68 
69  AtomicExpand() : FunctionPass(ID) {
71  }
72 
73  bool runOnFunction(Function &F) override;
74 
75 private:
76  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
77  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
78  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
79  bool tryExpandAtomicLoad(LoadInst *LI);
80  bool expandAtomicLoadToLL(LoadInst *LI);
81  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
82  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
83  bool tryExpandAtomicStore(StoreInst *SI);
84  void expandAtomicStore(StoreInst *SI);
85  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
86  AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
87  Value *
88  insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
89  Align AddrAlign, AtomicOrdering MemOpOrder,
90  function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
91  void expandAtomicOpToLLSC(
92  Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
93  AtomicOrdering MemOpOrder,
94  function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
95  void expandPartwordAtomicRMW(
97  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
98  bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
99  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
100  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
101 
102  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
103  static Value *insertRMWCmpXchgLoop(
104  IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
105  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
106  function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
107  CreateCmpXchgInstFun CreateCmpXchg);
108  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
109 
110  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
111  bool isIdempotentRMW(AtomicRMWInst *RMWI);
112  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
113 
114  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
115  Value *PointerOperand, Value *ValueOperand,
116  Value *CASExpected, AtomicOrdering Ordering,
117  AtomicOrdering Ordering2,
118  ArrayRef<RTLIB::Libcall> Libcalls);
119  void expandAtomicLoadToLibcall(LoadInst *LI);
120  void expandAtomicStoreToLibcall(StoreInst *LI);
121  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
122  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
123 
124  friend bool
126  CreateCmpXchgInstFun CreateCmpXchg);
127 };
128 
129 // IRBuilder to be used for replacement atomic instructions.
130 struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
131  // Preserves the DebugLoc from I, and preserves still valid metadata.
132  explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
133  : IRBuilder(I->getContext(), DL) {
134  SetInsertPoint(I);
135  this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
136  }
137 };
138 
139 } // end anonymous namespace
140 
141 char AtomicExpand::ID = 0;
142 
144 
145 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
146  false)
147 
148 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
149 
150 // Helper functions to retrieve the size of atomic instructions.
151 static unsigned getAtomicOpSize(LoadInst *LI) {
152  const DataLayout &DL = LI->getModule()->getDataLayout();
153  return DL.getTypeStoreSize(LI->getType());
154 }
155 
156 static unsigned getAtomicOpSize(StoreInst *SI) {
157  const DataLayout &DL = SI->getModule()->getDataLayout();
158  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
159 }
160 
161 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
162  const DataLayout &DL = RMWI->getModule()->getDataLayout();
163  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
164 }
165 
166 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
167  const DataLayout &DL = CASI->getModule()->getDataLayout();
168  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
169 }
170 
171 // Determine if a particular atomic operation has a supported size,
172 // and is of appropriate alignment, to be passed through for target
173 // lowering. (Versus turning into a __atomic libcall)
174 template <typename Inst>
175 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
176  unsigned Size = getAtomicOpSize(I);
177  Align Alignment = I->getAlign();
178  return Alignment >= Size &&
179  Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
180 }
181 
183  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
184  if (!TPC)
185  return false;
186 
187  auto &TM = TPC->getTM<TargetMachine>();
188  const auto *Subtarget = TM.getSubtargetImpl(F);
189  if (!Subtarget->enableAtomicExpand())
190  return false;
191  TLI = Subtarget->getTargetLowering();
192  DL = &F.getParent()->getDataLayout();
193 
194  SmallVector<Instruction *, 1> AtomicInsts;
195 
196  // Changing control-flow while iterating through it is a bad idea, so gather a
197  // list of all atomic instructions before we start.
198  for (Instruction &I : instructions(F))
199  if (I.isAtomic() && !isa<FenceInst>(&I))
200  AtomicInsts.push_back(&I);
201 
202  bool MadeChange = false;
203  for (auto *I : AtomicInsts) {
204  auto LI = dyn_cast<LoadInst>(I);
205  auto SI = dyn_cast<StoreInst>(I);
206  auto RMWI = dyn_cast<AtomicRMWInst>(I);
207  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
208  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
209 
210  // If the Size/Alignment is not supported, replace with a libcall.
211  if (LI) {
212  if (!atomicSizeSupported(TLI, LI)) {
213  expandAtomicLoadToLibcall(LI);
214  MadeChange = true;
215  continue;
216  }
217  } else if (SI) {
218  if (!atomicSizeSupported(TLI, SI)) {
219  expandAtomicStoreToLibcall(SI);
220  MadeChange = true;
221  continue;
222  }
223  } else if (RMWI) {
224  if (!atomicSizeSupported(TLI, RMWI)) {
225  expandAtomicRMWToLibcall(RMWI);
226  MadeChange = true;
227  continue;
228  }
229  } else if (CASI) {
230  if (!atomicSizeSupported(TLI, CASI)) {
231  expandAtomicCASToLibcall(CASI);
232  MadeChange = true;
233  continue;
234  }
235  }
236 
237  if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
239  I = LI = convertAtomicLoadToIntegerType(LI);
240  MadeChange = true;
241  } else if (SI &&
242  TLI->shouldCastAtomicStoreInIR(SI) ==
244  I = SI = convertAtomicStoreToIntegerType(SI);
245  MadeChange = true;
246  } else if (RMWI &&
247  TLI->shouldCastAtomicRMWIInIR(RMWI) ==
249  I = RMWI = convertAtomicXchgToIntegerType(RMWI);
250  MadeChange = true;
251  } else if (CASI) {
252  // TODO: when we're ready to make the change at the IR level, we can
253  // extend convertCmpXchgToInteger for floating point too.
254  if (CASI->getCompareOperand()->getType()->isPointerTy()) {
255  // TODO: add a TLI hook to control this so that each target can
256  // convert to lowering the original type one at a time.
257  I = CASI = convertCmpXchgToIntegerType(CASI);
258  MadeChange = true;
259  }
260  }
261 
262  if (TLI->shouldInsertFencesForAtomic(I)) {
263  auto FenceOrdering = AtomicOrdering::Monotonic;
264  if (LI && isAcquireOrStronger(LI->getOrdering())) {
265  FenceOrdering = LI->getOrdering();
266  LI->setOrdering(AtomicOrdering::Monotonic);
267  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
268  FenceOrdering = SI->getOrdering();
269  SI->setOrdering(AtomicOrdering::Monotonic);
270  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
271  isAcquireOrStronger(RMWI->getOrdering()))) {
272  FenceOrdering = RMWI->getOrdering();
273  RMWI->setOrdering(AtomicOrdering::Monotonic);
274  } else if (CASI &&
275  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
277  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
278  isAcquireOrStronger(CASI->getSuccessOrdering()) ||
279  isAcquireOrStronger(CASI->getFailureOrdering()))) {
280  // If a compare and swap is lowered to LL/SC, we can do smarter fence
281  // insertion, with a stronger one on the success path than on the
282  // failure path. As a result, fence insertion is directly done by
283  // expandAtomicCmpXchg in that case.
284  FenceOrdering = CASI->getMergedOrdering();
285  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
286  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
287  }
288 
289  if (FenceOrdering != AtomicOrdering::Monotonic) {
290  MadeChange |= bracketInstWithFences(I, FenceOrdering);
291  }
292  }
293 
294  if (LI)
295  MadeChange |= tryExpandAtomicLoad(LI);
296  else if (SI)
297  MadeChange |= tryExpandAtomicStore(SI);
298  else if (RMWI) {
299  // There are two different ways of expanding RMW instructions:
300  // - into a load if it is idempotent
301  // - into a Cmpxchg/LL-SC loop otherwise
302  // we try them in that order.
303 
304  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
305  MadeChange = true;
306  } else {
307  AtomicRMWInst::BinOp Op = RMWI->getOperation();
308  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
309  unsigned ValueSize = getAtomicOpSize(RMWI);
310  if (ValueSize < MinCASSize &&
312  Op == AtomicRMWInst::And)) {
313  RMWI = widenPartwordAtomicRMW(RMWI);
314  MadeChange = true;
315  }
316 
317  MadeChange |= tryExpandAtomicRMW(RMWI);
318  }
319  } else if (CASI)
320  MadeChange |= tryExpandAtomicCmpXchg(CASI);
321  }
322  return MadeChange;
323 }
324 
325 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
326  ReplacementIRBuilder Builder(I, *DL);
327 
328  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
329 
330  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
331  // We have a guard here because not every atomic operation generates a
332  // trailing fence.
333  if (TrailingFence)
334  TrailingFence->moveAfter(I);
335 
336  return (LeadingFence || TrailingFence);
337 }
338 
339 /// Get the iX type with the same bitwidth as T.
340 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
341  const DataLayout &DL) {
342  EVT VT = TLI->getMemValueType(DL, T);
343  unsigned BitWidth = VT.getStoreSizeInBits();
344  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
345  return IntegerType::get(T->getContext(), BitWidth);
346 }
347 
348 /// Convert an atomic load of a non-integral type to an integer load of the
349 /// equivalent bitwidth. See the function comment on
350 /// convertAtomicStoreToIntegerType for background.
351 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
352  auto *M = LI->getModule();
353  Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
354 
355  ReplacementIRBuilder Builder(LI, *DL);
356 
357  Value *Addr = LI->getPointerOperand();
358  Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
359  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
360 
361  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
362  NewLI->setAlignment(LI->getAlign());
363  NewLI->setVolatile(LI->isVolatile());
364  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
365  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
366 
367  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
368  LI->replaceAllUsesWith(NewVal);
369  LI->eraseFromParent();
370  return NewLI;
371 }
372 
374 AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
375  auto *M = RMWI->getModule();
376  Type *NewTy =
377  getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
378 
379  ReplacementIRBuilder Builder(RMWI, *DL);
380 
381  Value *Addr = RMWI->getPointerOperand();
382  Value *Val = RMWI->getValOperand();
383  Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
384  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
385  Value *NewVal = Val->getType()->isPointerTy()
386  ? Builder.CreatePtrToInt(Val, NewTy)
387  : Builder.CreateBitCast(Val, NewTy);
388 
389  auto *NewRMWI =
390  Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
391  RMWI->getAlign(), RMWI->getOrdering());
392  NewRMWI->setVolatile(RMWI->isVolatile());
393  LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
394 
395  Value *NewRVal = RMWI->getType()->isPointerTy()
396  ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
397  : Builder.CreateBitCast(NewRMWI, RMWI->getType());
398  RMWI->replaceAllUsesWith(NewRVal);
399  RMWI->eraseFromParent();
400  return NewRMWI;
401 }
402 
403 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
404  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
406  return false;
408  expandAtomicOpToLLSC(
409  LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
410  LI->getOrdering(),
411  [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
412  return true;
414  return expandAtomicLoadToLL(LI);
416  return expandAtomicLoadToCmpXchg(LI);
419  return true;
420  default:
421  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
422  }
423 }
424 
425 bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
426  switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
428  return false;
430  expandAtomicStore(SI);
431  return true;
433  SI->setAtomic(AtomicOrdering::NotAtomic);
434  return true;
435  default:
436  llvm_unreachable("Unhandled case in tryExpandAtomicStore");
437  }
438 }
439 
440 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
441  ReplacementIRBuilder Builder(LI, *DL);
442 
443  // On some architectures, load-linked instructions are atomic for larger
444  // sizes than normal loads. For example, the only 64-bit load guaranteed
445  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
446  Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
447  LI->getPointerOperand(), LI->getOrdering());
448  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
449 
450  LI->replaceAllUsesWith(Val);
451  LI->eraseFromParent();
452 
453  return true;
454 }
455 
456 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
457  ReplacementIRBuilder Builder(LI, *DL);
458  AtomicOrdering Order = LI->getOrdering();
459  if (Order == AtomicOrdering::Unordered)
461 
462  Value *Addr = LI->getPointerOperand();
463  Type *Ty = LI->getType();
464  Constant *DummyVal = Constant::getNullValue(Ty);
465 
466  Value *Pair = Builder.CreateAtomicCmpXchg(
467  Addr, DummyVal, DummyVal, LI->getAlign(), Order,
469  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
470 
471  LI->replaceAllUsesWith(Loaded);
472  LI->eraseFromParent();
473 
474  return true;
475 }
476 
477 /// Convert an atomic store of a non-integral type to an integer store of the
478 /// equivalent bitwidth. We used to not support floating point or vector
479 /// atomics in the IR at all. The backends learned to deal with the bitcast
480 /// idiom because that was the only way of expressing the notion of a atomic
481 /// float or vector store. The long term plan is to teach each backend to
482 /// instruction select from the original atomic store, but as a migration
483 /// mechanism, we convert back to the old format which the backends understand.
484 /// Each backend will need individual work to recognize the new format.
485 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
486  ReplacementIRBuilder Builder(SI, *DL);
487  auto *M = SI->getModule();
488  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
489  M->getDataLayout());
490  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
491 
492  Value *Addr = SI->getPointerOperand();
493  Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
494  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
495 
496  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
497  NewSI->setAlignment(SI->getAlign());
498  NewSI->setVolatile(SI->isVolatile());
499  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
500  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
501  SI->eraseFromParent();
502  return NewSI;
503 }
504 
505 void AtomicExpand::expandAtomicStore(StoreInst *SI) {
506  // This function is only called on atomic stores that are too large to be
507  // atomic if implemented as a native store. So we replace them by an
508  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
509  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
510  // It is the responsibility of the target to only signal expansion via
511  // shouldExpandAtomicRMW in cases where this is required and possible.
512  ReplacementIRBuilder Builder(SI, *DL);
513  AtomicOrdering Ordering = SI->getOrdering();
514  assert(Ordering != AtomicOrdering::NotAtomic);
515  AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
517  : Ordering;
518  AtomicRMWInst *AI = Builder.CreateAtomicRMW(
519  AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
520  SI->getAlign(), RMWOrdering);
521  SI->eraseFromParent();
522 
523  // Now we have an appropriate swap instruction, lower it as usual.
524  tryExpandAtomicRMW(AI);
525 }
526 
528  Value *Loaded, Value *NewVal, Align AddrAlign,
529  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
530  Value *&Success, Value *&NewLoaded) {
531  Type *OrigTy = NewVal->getType();
532 
533  // This code can go away when cmpxchg supports FP types.
534  assert(!OrigTy->isPointerTy());
535  bool NeedBitcast = OrigTy->isFloatingPointTy();
536  if (NeedBitcast) {
537  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
538  unsigned AS = Addr->getType()->getPointerAddressSpace();
539  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
540  NewVal = Builder.CreateBitCast(NewVal, IntTy);
541  Loaded = Builder.CreateBitCast(Loaded, IntTy);
542  }
543 
544  Value *Pair = Builder.CreateAtomicCmpXchg(
545  Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
547  Success = Builder.CreateExtractValue(Pair, 1, "success");
548  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
549 
550  if (NeedBitcast)
551  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
552 }
553 
554 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
555  LLVMContext &Ctx = AI->getModule()->getContext();
556  TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
557  switch (Kind) {
559  return false;
561  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
562  unsigned ValueSize = getAtomicOpSize(AI);
563  if (ValueSize < MinCASSize) {
564  expandPartwordAtomicRMW(AI,
566  } else {
567  auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
568  return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
569  AI->getValOperand());
570  };
571  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
572  AI->getAlign(), AI->getOrdering(), PerformOp);
573  }
574  return true;
575  }
577  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
578  unsigned ValueSize = getAtomicOpSize(AI);
579  if (ValueSize < MinCASSize) {
580  expandPartwordAtomicRMW(AI,
582  } else {
584  Ctx.getSyncScopeNames(SSNs);
585  auto MemScope = SSNs[AI->getSyncScopeID()].empty()
586  ? "system"
587  : SSNs[AI->getSyncScopeID()];
589  ORE.emit([&]() {
590  return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
591  << "A compare and swap loop was generated for an atomic "
592  << AI->getOperationName(AI->getOperation()) << " operation at "
593  << MemScope << " memory scope";
594  });
596  }
597  return true;
598  }
600  expandAtomicRMWToMaskedIntrinsic(AI);
601  return true;
602  }
604  TLI->emitBitTestAtomicRMWIntrinsic(AI);
605  return true;
606  }
608  TLI->emitCmpArithAtomicRMWIntrinsic(AI);
609  return true;
610  }
612  return lowerAtomicRMWInst(AI);
614  TLI->emitExpandAtomicRMW(AI);
615  return true;
616  default:
617  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
618  }
619 }
620 
621 namespace {
622 
623 struct PartwordMaskValues {
624  // These three fields are guaranteed to be set by createMaskInstrs.
625  Type *WordType = nullptr;
626  Type *ValueType = nullptr;
627  Type *IntValueType = nullptr;
628  Value *AlignedAddr = nullptr;
629  Align AlignedAddrAlignment;
630  // The remaining fields can be null.
631  Value *ShiftAmt = nullptr;
632  Value *Mask = nullptr;
633  Value *Inv_Mask = nullptr;
634 };
635 
637 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
638  auto PrintObj = [&O](auto *V) {
639  if (V)
640  O << *V;
641  else
642  O << "nullptr";
643  O << '\n';
644  };
645  O << "PartwordMaskValues {\n";
646  O << " WordType: ";
647  PrintObj(PMV.WordType);
648  O << " ValueType: ";
649  PrintObj(PMV.ValueType);
650  O << " AlignedAddr: ";
651  PrintObj(PMV.AlignedAddr);
652  O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
653  O << " ShiftAmt: ";
654  PrintObj(PMV.ShiftAmt);
655  O << " Mask: ";
656  PrintObj(PMV.Mask);
657  O << " Inv_Mask: ";
658  PrintObj(PMV.Inv_Mask);
659  O << "}\n";
660  return O;
661 }
662 
663 } // end anonymous namespace
664 
665 /// This is a helper function which builds instructions to provide
666 /// values necessary for partword atomic operations. It takes an
667 /// incoming address, Addr, and ValueType, and constructs the address,
668 /// shift-amounts and masks needed to work with a larger value of size
669 /// WordSize.
670 ///
671 /// AlignedAddr: Addr rounded down to a multiple of WordSize
672 ///
673 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
674 /// from AlignAddr for it to have the same value as if
675 /// ValueType was loaded from Addr.
676 ///
677 /// Mask: Value to mask with the value loaded from AlignAddr to
678 /// include only the part that would've been loaded from Addr.
679 ///
680 /// Inv_Mask: The inverse of Mask.
681 static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
683  Value *Addr, Align AddrAlign,
684  unsigned MinWordSize) {
685  PartwordMaskValues PMV;
686 
687  Module *M = I->getModule();
688  LLVMContext &Ctx = M->getContext();
689  const DataLayout &DL = M->getDataLayout();
690  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
691 
692  PMV.ValueType = PMV.IntValueType = ValueType;
693  if (PMV.ValueType->isFloatingPointTy())
694  PMV.IntValueType =
695  Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
696 
697  PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
698  : ValueType;
699  if (PMV.ValueType == PMV.WordType) {
700  PMV.AlignedAddr = Addr;
701  PMV.AlignedAddrAlignment = AddrAlign;
702  PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
703  PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
704  return PMV;
705  }
706 
707  PMV.AlignedAddrAlignment = Align(MinWordSize);
708 
709  assert(ValueSize < MinWordSize);
710 
711  PointerType *PtrTy = cast<PointerType>(Addr->getType());
712  Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace());
713  IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
714  Value *PtrLSB;
715 
716  if (AddrAlign < MinWordSize) {
717  PMV.AlignedAddr = Builder.CreateIntrinsic(
718  Intrinsic::ptrmask, {PtrTy, IntTy},
719  {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
720  "AlignedAddr");
721 
722  Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
723  PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
724  } else {
725  // If the alignment is high enough, the LSB are known 0.
726  PMV.AlignedAddr = Addr;
727  PtrLSB = ConstantInt::getNullValue(IntTy);
728  }
729 
730  if (DL.isLittleEndian()) {
731  // turn bytes into bits
732  PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
733  } else {
734  // turn bytes into bits, and count from the other side.
735  PMV.ShiftAmt = Builder.CreateShl(
736  Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
737  }
738 
739  PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
740  PMV.Mask = Builder.CreateShl(
741  ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
742  "Mask");
743 
744  PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
745 
746  // Cast for typed pointers.
747  PMV.AlignedAddr =
748  Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr");
749 
750  return PMV;
751 }
752 
754  const PartwordMaskValues &PMV) {
755  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
756  if (PMV.WordType == PMV.ValueType)
757  return WideWord;
758 
759  Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
760  Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
761  return Builder.CreateBitCast(Trunc, PMV.ValueType);
762 }
763 
765  Value *Updated, const PartwordMaskValues &PMV) {
766  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
767  assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
768  if (PMV.WordType == PMV.ValueType)
769  return Updated;
770 
771  Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
772 
773  Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
774  Value *Shift =
775  Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
776  Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
777  Value *Or = Builder.CreateOr(And, Shift, "inserted");
778  return Or;
779 }
780 
781 /// Emit IR to implement a masked version of a given atomicrmw
782 /// operation. (That is, only the bits under the Mask should be
783 /// affected by the operation)
785  IRBuilderBase &Builder, Value *Loaded,
786  Value *Shifted_Inc, Value *Inc,
787  const PartwordMaskValues &PMV) {
788  // TODO: update to use
789  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
790  // to merge bits from two values without requiring PMV.Inv_Mask.
791  switch (Op) {
792  case AtomicRMWInst::Xchg: {
793  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
794  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
795  return FinalVal;
796  }
797  case AtomicRMWInst::Or:
798  case AtomicRMWInst::Xor:
799  case AtomicRMWInst::And:
800  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
801  case AtomicRMWInst::Add:
802  case AtomicRMWInst::Sub:
803  case AtomicRMWInst::Nand: {
804  // The other arithmetic ops need to be masked into place.
805  Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
806  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
807  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
808  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
809  return FinalVal;
810  }
811  case AtomicRMWInst::Max:
812  case AtomicRMWInst::Min:
813  case AtomicRMWInst::UMax:
814  case AtomicRMWInst::UMin:
815  case AtomicRMWInst::FAdd:
816  case AtomicRMWInst::FSub:
817  case AtomicRMWInst::FMin:
818  case AtomicRMWInst::FMax: {
819  // Finally, other ops will operate on the full value, so truncate down to
820  // the original size, and expand out again after doing the
821  // operation. Bitcasts will be inserted for FP values.
822  Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
823  Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
824  Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
825  return FinalVal;
826  }
827  default:
828  llvm_unreachable("Unknown atomic op");
829  }
830 }
831 
832 /// Expand a sub-word atomicrmw operation into an appropriate
833 /// word-sized operation.
834 ///
835 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
836 /// way as a typical atomicrmw expansion. The only difference here is
837 /// that the operation inside of the loop may operate upon only a
838 /// part of the value.
839 void AtomicExpand::expandPartwordAtomicRMW(
841  AtomicOrdering MemOpOrder = AI->getOrdering();
842  SyncScope::ID SSID = AI->getSyncScopeID();
843 
844  ReplacementIRBuilder Builder(AI, *DL);
845 
846  PartwordMaskValues PMV =
848  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
849 
850  Value *ValOperand_Shifted = nullptr;
851  if (AI->getOperation() == AtomicRMWInst::Xchg ||
852  AI->getOperation() == AtomicRMWInst::Add ||
853  AI->getOperation() == AtomicRMWInst::Sub ||
855  ValOperand_Shifted =
856  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
857  PMV.ShiftAmt, "ValOperand_Shifted");
858  }
859 
860  auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
861  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
862  ValOperand_Shifted, AI->getValOperand(), PMV);
863  };
864 
865  Value *OldResult;
867  OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
868  PMV.AlignedAddrAlignment, MemOpOrder, SSID,
869  PerformPartwordOp, createCmpXchgInstFun);
870  } else {
872  OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
873  PMV.AlignedAddrAlignment, MemOpOrder,
874  PerformPartwordOp);
875  }
876 
877  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
878  AI->replaceAllUsesWith(FinalOldResult);
879  AI->eraseFromParent();
880 }
881 
882 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
883 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
884  ReplacementIRBuilder Builder(AI, *DL);
886 
888  Op == AtomicRMWInst::And) &&
889  "Unable to widen operation");
890 
891  PartwordMaskValues PMV =
893  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
894 
895  Value *ValOperand_Shifted =
896  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
897  PMV.ShiftAmt, "ValOperand_Shifted");
898 
899  Value *NewOperand;
900 
901  if (Op == AtomicRMWInst::And)
902  NewOperand =
903  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
904  else
905  NewOperand = ValOperand_Shifted;
906 
907  AtomicRMWInst *NewAI =
908  Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
909  PMV.AlignedAddrAlignment, AI->getOrdering());
910 
911  Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
912  AI->replaceAllUsesWith(FinalOldResult);
913  AI->eraseFromParent();
914  return NewAI;
915 }
916 
917 bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
918  // The basic idea here is that we're expanding a cmpxchg of a
919  // smaller memory size up to a word-sized cmpxchg. To do this, we
920  // need to add a retry-loop for strong cmpxchg, so that
921  // modifications to other parts of the word don't cause a spurious
922  // failure.
923 
924  // This generates code like the following:
925  // [[Setup mask values PMV.*]]
926  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
927  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
928  // %InitLoaded = load i32* %addr
929  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
930  // br partword.cmpxchg.loop
931  // partword.cmpxchg.loop:
932  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
933  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
934  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
935  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
936  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
937  // i32 %FullWord_NewVal success_ordering failure_ordering
938  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
939  // %Success = extractvalue { i32, i1 } %NewCI, 1
940  // br i1 %Success, label %partword.cmpxchg.end,
941  // label %partword.cmpxchg.failure
942  // partword.cmpxchg.failure:
943  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
944  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
945  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
946  // label %partword.cmpxchg.end
947  // partword.cmpxchg.end:
948  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
949  // %FinalOldVal = trunc i32 %tmp1 to i8
950  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
951  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
952 
953  Value *Addr = CI->getPointerOperand();
954  Value *Cmp = CI->getCompareOperand();
955  Value *NewVal = CI->getNewValOperand();
956 
957  BasicBlock *BB = CI->getParent();
958  Function *F = BB->getParent();
959  ReplacementIRBuilder Builder(CI, *DL);
960  LLVMContext &Ctx = Builder.getContext();
961 
962  BasicBlock *EndBB =
963  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
964  auto FailureBB =
965  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
966  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
967 
968  // The split call above "helpfully" added a branch at the end of BB
969  // (to the wrong place).
970  std::prev(BB->end())->eraseFromParent();
971  Builder.SetInsertPoint(BB);
972 
973  PartwordMaskValues PMV =
975  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
976 
977  // Shift the incoming values over, into the right location in the word.
978  Value *NewVal_Shifted =
979  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
980  Value *Cmp_Shifted =
981  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
982 
983  // Load the entire current word, and mask into place the expected and new
984  // values
985  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
986  InitLoaded->setVolatile(CI->isVolatile());
987  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
988  Builder.CreateBr(LoopBB);
989 
990  // partword.cmpxchg.loop:
991  Builder.SetInsertPoint(LoopBB);
992  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
993  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
994 
995  // Mask/Or the expected and new values into place in the loaded word.
996  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
997  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
998  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
999  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1001  NewCI->setVolatile(CI->isVolatile());
1002  // When we're building a strong cmpxchg, we need a loop, so you
1003  // might think we could use a weak cmpxchg inside. But, using strong
1004  // allows the below comparison for ShouldContinue, and we're
1005  // expecting the underlying cmpxchg to be a machine instruction,
1006  // which is strong anyways.
1007  NewCI->setWeak(CI->isWeak());
1008 
1009  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1010  Value *Success = Builder.CreateExtractValue(NewCI, 1);
1011 
1012  if (CI->isWeak())
1013  Builder.CreateBr(EndBB);
1014  else
1015  Builder.CreateCondBr(Success, EndBB, FailureBB);
1016 
1017  // partword.cmpxchg.failure:
1018  Builder.SetInsertPoint(FailureBB);
1019  // Upon failure, verify that the masked-out part of the loaded value
1020  // has been modified. If it didn't, abort the cmpxchg, since the
1021  // masked-in part must've.
1022  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1023  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1024  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1025 
1026  // Add the second value to the phi from above
1027  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1028 
1029  // partword.cmpxchg.end:
1030  Builder.SetInsertPoint(CI);
1031 
1032  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1033  Value *Res = PoisonValue::get(CI->getType());
1034  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1035  Res = Builder.CreateInsertValue(Res, Success, 1);
1036 
1037  CI->replaceAllUsesWith(Res);
1038  CI->eraseFromParent();
1039  return true;
1040 }
1041 
1042 void AtomicExpand::expandAtomicOpToLLSC(
1043  Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1044  AtomicOrdering MemOpOrder,
1045  function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1046  ReplacementIRBuilder Builder(I, *DL);
1047  Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1048  MemOpOrder, PerformOp);
1049 
1050  I->replaceAllUsesWith(Loaded);
1051  I->eraseFromParent();
1052 }
1053 
1054 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1055  ReplacementIRBuilder Builder(AI, *DL);
1056 
1057  PartwordMaskValues PMV =
1059  AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1060 
1061  // The value operand must be sign-extended for signed min/max so that the
1062  // target's signed comparison instructions can be used. Otherwise, just
1063  // zero-ext.
1064  Instruction::CastOps CastOp = Instruction::ZExt;
1065  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1066  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1067  CastOp = Instruction::SExt;
1068 
1069  Value *ValOperand_Shifted = Builder.CreateShl(
1070  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1071  PMV.ShiftAmt, "ValOperand_Shifted");
1072  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1073  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1074  AI->getOrdering());
1075  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1076  AI->replaceAllUsesWith(FinalOldResult);
1077  AI->eraseFromParent();
1078 }
1079 
1080 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1081  ReplacementIRBuilder Builder(CI, *DL);
1082 
1083  PartwordMaskValues PMV = createMaskInstrs(
1084  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1085  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1086 
1087  Value *CmpVal_Shifted = Builder.CreateShl(
1088  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1089  "CmpVal_Shifted");
1090  Value *NewVal_Shifted = Builder.CreateShl(
1091  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1092  "NewVal_Shifted");
1093  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1094  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1095  CI->getMergedOrdering());
1096  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1097  Value *Res = PoisonValue::get(CI->getType());
1098  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1099  Value *Success = Builder.CreateICmpEQ(
1100  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1101  Res = Builder.CreateInsertValue(Res, Success, 1);
1102 
1103  CI->replaceAllUsesWith(Res);
1104  CI->eraseFromParent();
1105 }
1106 
1107 Value *AtomicExpand::insertRMWLLSCLoop(
1108  IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1109  AtomicOrdering MemOpOrder,
1110  function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1111  LLVMContext &Ctx = Builder.getContext();
1112  BasicBlock *BB = Builder.GetInsertBlock();
1113  Function *F = BB->getParent();
1114 
1115  assert(AddrAlign >=
1116  F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1117  "Expected at least natural alignment at this point.");
1118 
1119  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1120  //
1121  // The standard expansion we produce is:
1122  // [...]
1123  // atomicrmw.start:
1124  // %loaded = @load.linked(%addr)
1125  // %new = some_op iN %loaded, %incr
1126  // %stored = @store_conditional(%new, %addr)
1127  // %try_again = icmp i32 ne %stored, 0
1128  // br i1 %try_again, label %loop, label %atomicrmw.end
1129  // atomicrmw.end:
1130  // [...]
1131  BasicBlock *ExitBB =
1132  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1133  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1134 
1135  // The split call above "helpfully" added a branch at the end of BB (to the
1136  // wrong place).
1137  std::prev(BB->end())->eraseFromParent();
1138  Builder.SetInsertPoint(BB);
1139  Builder.CreateBr(LoopBB);
1140 
1141  // Start the main loop block now that we've taken care of the preliminaries.
1142  Builder.SetInsertPoint(LoopBB);
1143  Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1144 
1145  Value *NewVal = PerformOp(Builder, Loaded);
1146 
1147  Value *StoreSuccess =
1148  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1149  Value *TryAgain = Builder.CreateICmpNE(
1150  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1151  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1152 
1153  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1154  return Loaded;
1155 }
1156 
1157 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1158 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1159 /// IR. As a migration step, we convert back to what use to be the standard
1160 /// way to represent a pointer cmpxchg so that we can update backends one by
1161 /// one.
1163 AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1164  auto *M = CI->getModule();
1165  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1166  M->getDataLayout());
1167 
1168  ReplacementIRBuilder Builder(CI, *DL);
1169 
1170  Value *Addr = CI->getPointerOperand();
1171  Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
1172  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1173 
1174  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1175  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1176 
1177  auto *NewCI = Builder.CreateAtomicCmpXchg(
1178  NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1179  CI->getFailureOrdering(), CI->getSyncScopeID());
1180  NewCI->setVolatile(CI->isVolatile());
1181  NewCI->setWeak(CI->isWeak());
1182  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1183 
1184  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1185  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1186 
1187  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1188 
1189  Value *Res = PoisonValue::get(CI->getType());
1190  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1191  Res = Builder.CreateInsertValue(Res, Succ, 1);
1192 
1193  CI->replaceAllUsesWith(Res);
1194  CI->eraseFromParent();
1195  return NewCI;
1196 }
1197 
1198 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1199  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1200  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1201  Value *Addr = CI->getPointerOperand();
1202  BasicBlock *BB = CI->getParent();
1203  Function *F = BB->getParent();
1204  LLVMContext &Ctx = F->getContext();
1205  // If shouldInsertFencesForAtomic() returns true, then the target does not
1206  // want to deal with memory orders, and emitLeading/TrailingFence should take
1207  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1208  // should preserve the ordering.
1209  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1210  AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1212  : CI->getMergedOrdering();
1213 
1214  // In implementations which use a barrier to achieve release semantics, we can
1215  // delay emitting this barrier until we know a store is actually going to be
1216  // attempted. The cost of this delay is that we need 2 copies of the block
1217  // emitting the load-linked, affecting code size.
1218  //
1219  // Ideally, this logic would be unconditional except for the minsize check
1220  // since in other cases the extra blocks naturally collapse down to the
1221  // minimal loop. Unfortunately, this puts too much stress on later
1222  // optimisations so we avoid emitting the extra logic in those cases too.
1223  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1224  SuccessOrder != AtomicOrdering::Monotonic &&
1225  SuccessOrder != AtomicOrdering::Acquire &&
1226  !F->hasMinSize();
1227 
1228  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1229  // do it even on minsize.
1230  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1231 
1232  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1233  //
1234  // The full expansion we produce is:
1235  // [...]
1236  // %aligned.addr = ...
1237  // cmpxchg.start:
1238  // %unreleasedload = @load.linked(%aligned.addr)
1239  // %unreleasedload.extract = extract value from %unreleasedload
1240  // %should_store = icmp eq %unreleasedload.extract, %desired
1241  // br i1 %should_store, label %cmpxchg.releasingstore,
1242  // label %cmpxchg.nostore
1243  // cmpxchg.releasingstore:
1244  // fence?
1245  // br label cmpxchg.trystore
1246  // cmpxchg.trystore:
1247  // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1248  // [%releasedload, %cmpxchg.releasedload]
1249  // %updated.new = insert %new into %loaded.trystore
1250  // %stored = @store_conditional(%updated.new, %aligned.addr)
1251  // %success = icmp eq i32 %stored, 0
1252  // br i1 %success, label %cmpxchg.success,
1253  // label %cmpxchg.releasedload/%cmpxchg.failure
1254  // cmpxchg.releasedload:
1255  // %releasedload = @load.linked(%aligned.addr)
1256  // %releasedload.extract = extract value from %releasedload
1257  // %should_store = icmp eq %releasedload.extract, %desired
1258  // br i1 %should_store, label %cmpxchg.trystore,
1259  // label %cmpxchg.failure
1260  // cmpxchg.success:
1261  // fence?
1262  // br label %cmpxchg.end
1263  // cmpxchg.nostore:
1264  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1265  // [%releasedload,
1266  // %cmpxchg.releasedload/%cmpxchg.trystore]
1267  // @load_linked_fail_balance()?
1268  // br label %cmpxchg.failure
1269  // cmpxchg.failure:
1270  // fence?
1271  // br label %cmpxchg.end
1272  // cmpxchg.end:
1273  // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1274  // [%loaded.trystore, %cmpxchg.trystore]
1275  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1276  // %loaded = extract value from %loaded.exit
1277  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1278  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1279  // [...]
1280  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1281  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1282  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1283  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1284  auto ReleasedLoadBB =
1285  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1286  auto TryStoreBB =
1287  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1288  auto ReleasingStoreBB =
1289  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1290  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1291 
1292  ReplacementIRBuilder Builder(CI, *DL);
1293 
1294  // The split call above "helpfully" added a branch at the end of BB (to the
1295  // wrong place), but we might want a fence too. It's easiest to just remove
1296  // the branch entirely.
1297  std::prev(BB->end())->eraseFromParent();
1298  Builder.SetInsertPoint(BB);
1299  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1300  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1301 
1302  PartwordMaskValues PMV =
1304  CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1305  Builder.CreateBr(StartBB);
1306 
1307  // Start the main loop block now that we've taken care of the preliminaries.
1308  Builder.SetInsertPoint(StartBB);
1309  Value *UnreleasedLoad =
1310  TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1311  Value *UnreleasedLoadExtract =
1312  extractMaskedValue(Builder, UnreleasedLoad, PMV);
1313  Value *ShouldStore = Builder.CreateICmpEQ(
1314  UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1315 
1316  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1317  // jump straight past that fence instruction (if it exists).
1318  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1319 
1320  Builder.SetInsertPoint(ReleasingStoreBB);
1321  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1322  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1323  Builder.CreateBr(TryStoreBB);
1324 
1325  Builder.SetInsertPoint(TryStoreBB);
1326  PHINode *LoadedTryStore =
1327  Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1328  LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1329  Value *NewValueInsert =
1330  insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1331  Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1332  PMV.AlignedAddr, MemOpOrder);
1333  StoreSuccess = Builder.CreateICmpEQ(
1334  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1335  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1336  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1337  CI->isWeak() ? FailureBB : RetryBB);
1338 
1339  Builder.SetInsertPoint(ReleasedLoadBB);
1340  Value *SecondLoad;
1341  if (HasReleasedLoadBB) {
1342  SecondLoad =
1343  TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1344  Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1345  ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1346  CI->getCompareOperand(), "should_store");
1347 
1348  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1349  // jump straight past that fence instruction (if it exists).
1350  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1351  // Update PHI node in TryStoreBB.
1352  LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1353  } else
1354  Builder.CreateUnreachable();
1355 
1356  // Make sure later instructions don't get reordered with a fence if
1357  // necessary.
1358  Builder.SetInsertPoint(SuccessBB);
1359  if (ShouldInsertFencesForAtomic)
1360  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1361  Builder.CreateBr(ExitBB);
1362 
1363  Builder.SetInsertPoint(NoStoreBB);
1364  PHINode *LoadedNoStore =
1365  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1366  LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1367  if (HasReleasedLoadBB)
1368  LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1369 
1370  // In the failing case, where we don't execute the store-conditional, the
1371  // target might want to balance out the load-linked with a dedicated
1372  // instruction (e.g., on ARM, clearing the exclusive monitor).
1373  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1374  Builder.CreateBr(FailureBB);
1375 
1376  Builder.SetInsertPoint(FailureBB);
1377  PHINode *LoadedFailure =
1378  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1379  LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1380  if (CI->isWeak())
1381  LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1382  if (ShouldInsertFencesForAtomic)
1383  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1384  Builder.CreateBr(ExitBB);
1385 
1386  // Finally, we have control-flow based knowledge of whether the cmpxchg
1387  // succeeded or not. We expose this to later passes by converting any
1388  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1389  // PHI.
1390  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1391  PHINode *LoadedExit =
1392  Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1393  LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1394  LoadedExit->addIncoming(LoadedFailure, FailureBB);
1395  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1396  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1397  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1398 
1399  // This is the "exit value" from the cmpxchg expansion. It may be of
1400  // a type wider than the one in the cmpxchg instruction.
1401  Value *LoadedFull = LoadedExit;
1402 
1403  Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1404  Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1405 
1406  // Look for any users of the cmpxchg that are just comparing the loaded value
1407  // against the desired one, and replace them with the CFG-derived version.
1409  for (auto *User : CI->users()) {
1410  ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1411  if (!EV)
1412  continue;
1413 
1414  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1415  "weird extraction from { iN, i1 }");
1416 
1417  if (EV->getIndices()[0] == 0)
1418  EV->replaceAllUsesWith(Loaded);
1419  else
1421 
1422  PrunedInsts.push_back(EV);
1423  }
1424 
1425  // We can remove the instructions now we're no longer iterating through them.
1426  for (auto *EV : PrunedInsts)
1427  EV->eraseFromParent();
1428 
1429  if (!CI->use_empty()) {
1430  // Some use of the full struct return that we don't understand has happened,
1431  // so we've got to reconstruct it properly.
1432  Value *Res;
1433  Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1434  Res = Builder.CreateInsertValue(Res, Success, 1);
1435 
1436  CI->replaceAllUsesWith(Res);
1437  }
1438 
1439  CI->eraseFromParent();
1440  return true;
1441 }
1442 
1443 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
1444  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1445  if (!C)
1446  return false;
1447 
1449  switch (Op) {
1450  case AtomicRMWInst::Add:
1451  case AtomicRMWInst::Sub:
1452  case AtomicRMWInst::Or:
1453  case AtomicRMWInst::Xor:
1454  return C->isZero();
1455  case AtomicRMWInst::And:
1456  return C->isMinusOne();
1457  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1458  default:
1459  return false;
1460  }
1461 }
1462 
1463 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1464  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1465  tryExpandAtomicLoad(ResultingLoad);
1466  return true;
1467  }
1468  return false;
1469 }
1470 
1471 Value *AtomicExpand::insertRMWCmpXchgLoop(
1472  IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1473  AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1474  function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1475  CreateCmpXchgInstFun CreateCmpXchg) {
1476  LLVMContext &Ctx = Builder.getContext();
1477  BasicBlock *BB = Builder.GetInsertBlock();
1478  Function *F = BB->getParent();
1479 
1480  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1481  //
1482  // The standard expansion we produce is:
1483  // [...]
1484  // %init_loaded = load atomic iN* %addr
1485  // br label %loop
1486  // loop:
1487  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1488  // %new = some_op iN %loaded, %incr
1489  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1490  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1491  // %success = extractvalue { iN, i1 } %pair, 1
1492  // br i1 %success, label %atomicrmw.end, label %loop
1493  // atomicrmw.end:
1494  // [...]
1495  BasicBlock *ExitBB =
1496  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1497  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1498 
1499  // The split call above "helpfully" added a branch at the end of BB (to the
1500  // wrong place), but we want a load. It's easiest to just remove
1501  // the branch entirely.
1502  std::prev(BB->end())->eraseFromParent();
1503  Builder.SetInsertPoint(BB);
1504  LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1505  Builder.CreateBr(LoopBB);
1506 
1507  // Start the main loop block now that we've taken care of the preliminaries.
1508  Builder.SetInsertPoint(LoopBB);
1509  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1510  Loaded->addIncoming(InitLoaded, BB);
1511 
1512  Value *NewVal = PerformOp(Builder, Loaded);
1513 
1514  Value *NewLoaded = nullptr;
1515  Value *Success = nullptr;
1516 
1517  CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1518  MemOpOrder == AtomicOrdering::Unordered
1520  : MemOpOrder,
1521  SSID, Success, NewLoaded);
1522  assert(Success && NewLoaded);
1523 
1524  Loaded->addIncoming(NewLoaded, LoopBB);
1525 
1526  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1527 
1528  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1529  return NewLoaded;
1530 }
1531 
1532 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1533  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1534  unsigned ValueSize = getAtomicOpSize(CI);
1535 
1536  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1537  default:
1538  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1540  if (ValueSize < MinCASSize)
1541  return expandPartwordCmpXchg(CI);
1542  return false;
1544  return expandAtomicCmpXchg(CI);
1545  }
1547  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1548  return true;
1550  return lowerAtomicCmpXchgInst(CI);
1551  }
1552 }
1553 
1554 // Note: This function is exposed externally by AtomicExpandUtils.h
1556  CreateCmpXchgInstFun CreateCmpXchg) {
1557  ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1558  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1559  Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1560  AI->getOrdering(), AI->getSyncScopeID(),
1561  [&](IRBuilderBase &Builder, Value *Loaded) {
1562  return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1563  AI->getValOperand());
1564  },
1565  CreateCmpXchg);
1566 
1567  AI->replaceAllUsesWith(Loaded);
1568  AI->eraseFromParent();
1569  return true;
1570 }
1571 
1572 // In order to use one of the sized library calls such as
1573 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1574 // must be one of the potentially-specialized sizes, and the value
1575 // type must actually exist in C on the target (otherwise, the
1576 // function wouldn't actually be defined.)
1577 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1578  const DataLayout &DL) {
1579  // TODO: "LargestSize" is an approximation for "largest type that
1580  // you can express in C". It seems to be the case that int128 is
1581  // supported on all 64-bit platforms, otherwise only up to 64-bit
1582  // integers are supported. If we get this wrong, then we'll try to
1583  // call a sized libcall that doesn't actually exist. There should
1584  // really be some more reliable way in LLVM of determining integer
1585  // sizes which are valid in the target's C ABI...
1586  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1587  return Alignment >= Size &&
1588  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1589  Size <= LargestSize;
1590 }
1591 
1592 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1593  static const RTLIB::Libcall Libcalls[6] = {
1594  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1595  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1596  unsigned Size = getAtomicOpSize(I);
1597 
1598  bool expanded = expandAtomicOpToLibcall(
1599  I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1600  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1601  if (!expanded)
1602  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1603 }
1604 
1605 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1606  static const RTLIB::Libcall Libcalls[6] = {
1607  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1608  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1609  unsigned Size = getAtomicOpSize(I);
1610 
1611  bool expanded = expandAtomicOpToLibcall(
1612  I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1613  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1614  if (!expanded)
1615  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1616 }
1617 
1618 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1619  static const RTLIB::Libcall Libcalls[6] = {
1620  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1621  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1622  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1623  unsigned Size = getAtomicOpSize(I);
1624 
1625  bool expanded = expandAtomicOpToLibcall(
1626  I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1627  I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1628  Libcalls);
1629  if (!expanded)
1630  report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1631 }
1632 
1634  static const RTLIB::Libcall LibcallsXchg[6] = {
1635  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1636  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1637  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1638  static const RTLIB::Libcall LibcallsAdd[6] = {
1639  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1640  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1641  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1642  static const RTLIB::Libcall LibcallsSub[6] = {
1643  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1644  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1645  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1646  static const RTLIB::Libcall LibcallsAnd[6] = {
1647  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1648  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1649  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1650  static const RTLIB::Libcall LibcallsOr[6] = {
1651  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1652  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1653  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1654  static const RTLIB::Libcall LibcallsXor[6] = {
1655  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1656  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1657  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1658  static const RTLIB::Libcall LibcallsNand[6] = {
1659  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1660  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1661  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1662 
1663  switch (Op) {
1665  llvm_unreachable("Should not have BAD_BINOP.");
1666  case AtomicRMWInst::Xchg:
1667  return makeArrayRef(LibcallsXchg);
1668  case AtomicRMWInst::Add:
1669  return makeArrayRef(LibcallsAdd);
1670  case AtomicRMWInst::Sub:
1671  return makeArrayRef(LibcallsSub);
1672  case AtomicRMWInst::And:
1673  return makeArrayRef(LibcallsAnd);
1674  case AtomicRMWInst::Or:
1675  return makeArrayRef(LibcallsOr);
1676  case AtomicRMWInst::Xor:
1677  return makeArrayRef(LibcallsXor);
1678  case AtomicRMWInst::Nand:
1679  return makeArrayRef(LibcallsNand);
1680  case AtomicRMWInst::Max:
1681  case AtomicRMWInst::Min:
1682  case AtomicRMWInst::UMax:
1683  case AtomicRMWInst::UMin:
1684  case AtomicRMWInst::FMax:
1685  case AtomicRMWInst::FMin:
1686  case AtomicRMWInst::FAdd:
1687  case AtomicRMWInst::FSub:
1688  // No atomic libcalls are available for max/min/umax/umin.
1689  return {};
1690  }
1691  llvm_unreachable("Unexpected AtomicRMW operation.");
1692 }
1693 
1694 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1695  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1696 
1697  unsigned Size = getAtomicOpSize(I);
1698 
1699  bool Success = false;
1700  if (!Libcalls.empty())
1701  Success = expandAtomicOpToLibcall(
1702  I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1703  nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1704 
1705  // The expansion failed: either there were no libcalls at all for
1706  // the operation (min/max), or there were only size-specialized
1707  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1708  // CAS libcall, via a CAS loop, instead.
1709  if (!Success) {
1711  I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1712  Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1713  SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1714  // Create the CAS instruction normally...
1715  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1716  Addr, Loaded, NewVal, Alignment, MemOpOrder,
1718  Success = Builder.CreateExtractValue(Pair, 1, "success");
1719  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1720 
1721  // ...and then expand the CAS into a libcall.
1722  expandAtomicCASToLibcall(Pair);
1723  });
1724  }
1725 }
1726 
1727 // A helper routine for the above expandAtomic*ToLibcall functions.
1728 //
1729 // 'Libcalls' contains an array of enum values for the particular
1730 // ATOMIC libcalls to be emitted. All of the other arguments besides
1731 // 'I' are extracted from the Instruction subclass by the
1732 // caller. Depending on the particular call, some will be null.
1733 bool AtomicExpand::expandAtomicOpToLibcall(
1734  Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1735  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1736  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1737  assert(Libcalls.size() == 6);
1738 
1739  LLVMContext &Ctx = I->getContext();
1740  Module *M = I->getModule();
1741  const DataLayout &DL = M->getDataLayout();
1743  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1744 
1745  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1746  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1747 
1748  const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1749 
1750  // TODO: the "order" argument type is "int", not int32. So
1751  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1752  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1753  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1754  Constant *OrderingVal =
1755  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1756  Constant *Ordering2Val = nullptr;
1757  if (CASExpected) {
1758  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1759  Ordering2Val =
1760  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1761  }
1762  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1763 
1764  RTLIB::Libcall RTLibType;
1765  if (UseSizedLibcall) {
1766  switch (Size) {
1767  case 1:
1768  RTLibType = Libcalls[1];
1769  break;
1770  case 2:
1771  RTLibType = Libcalls[2];
1772  break;
1773  case 4:
1774  RTLibType = Libcalls[3];
1775  break;
1776  case 8:
1777  RTLibType = Libcalls[4];
1778  break;
1779  case 16:
1780  RTLibType = Libcalls[5];
1781  break;
1782  }
1783  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1784  RTLibType = Libcalls[0];
1785  } else {
1786  // Can't use sized function, and there's no generic for this
1787  // operation, so give up.
1788  return false;
1789  }
1790 
1791  if (!TLI->getLibcallName(RTLibType)) {
1792  // This target does not implement the requested atomic libcall so give up.
1793  return false;
1794  }
1795 
1796  // Build up the function call. There's two kinds. First, the sized
1797  // variants. These calls are going to be one of the following (with
1798  // N=1,2,4,8,16):
1799  // iN __atomic_load_N(iN *ptr, int ordering)
1800  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1801  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1802  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1803  // int success_order, int failure_order)
1804  //
1805  // Note that these functions can be used for non-integer atomic
1806  // operations, the values just need to be bitcast to integers on the
1807  // way in and out.
1808  //
1809  // And, then, the generic variants. They look like the following:
1810  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1811  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1812  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1813  // int ordering)
1814  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1815  // void *desired, int success_order,
1816  // int failure_order)
1817  //
1818  // The different signatures are built up depending on the
1819  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1820  // variables.
1821 
1822  AllocaInst *AllocaCASExpected = nullptr;
1823  Value *AllocaCASExpected_i8 = nullptr;
1824  AllocaInst *AllocaValue = nullptr;
1825  Value *AllocaValue_i8 = nullptr;
1826  AllocaInst *AllocaResult = nullptr;
1827  Value *AllocaResult_i8 = nullptr;
1828 
1829  Type *ResultTy;
1831  AttributeList Attr;
1832 
1833  // 'size' argument.
1834  if (!UseSizedLibcall) {
1835  // Note, getIntPtrType is assumed equivalent to size_t.
1836  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1837  }
1838 
1839  // 'ptr' argument.
1840  // note: This assumes all address spaces share a common libfunc
1841  // implementation and that addresses are convertable. For systems without
1842  // that property, we'd need to extend this mechanism to support AS-specific
1843  // families of atomic intrinsics.
1844  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1845  Value *PtrVal =
1846  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
1847  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1848  Args.push_back(PtrVal);
1849 
1850  // 'expected' argument, if present.
1851  if (CASExpected) {
1852  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1853  AllocaCASExpected->setAlignment(AllocaAlignment);
1854  unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
1855 
1856  AllocaCASExpected_i8 = Builder.CreateBitCast(
1857  AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
1858  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1859  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1860  Args.push_back(AllocaCASExpected_i8);
1861  }
1862 
1863  // 'val' argument ('desired' for cas), if present.
1864  if (ValueOperand) {
1865  if (UseSizedLibcall) {
1866  Value *IntValue =
1867  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1868  Args.push_back(IntValue);
1869  } else {
1870  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1871  AllocaValue->setAlignment(AllocaAlignment);
1872  AllocaValue_i8 =
1873  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1874  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1875  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1876  Args.push_back(AllocaValue_i8);
1877  }
1878  }
1879 
1880  // 'ret' argument.
1881  if (!CASExpected && HasResult && !UseSizedLibcall) {
1882  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1883  AllocaResult->setAlignment(AllocaAlignment);
1884  unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
1885  AllocaResult_i8 =
1886  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1887  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1888  Args.push_back(AllocaResult_i8);
1889  }
1890 
1891  // 'ordering' ('success_order' for cas) argument.
1892  Args.push_back(OrderingVal);
1893 
1894  // 'failure_order' argument, if present.
1895  if (Ordering2Val)
1896  Args.push_back(Ordering2Val);
1897 
1898  // Now, the return type.
1899  if (CASExpected) {
1900  ResultTy = Type::getInt1Ty(Ctx);
1901  Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1902  } else if (HasResult && UseSizedLibcall)
1903  ResultTy = SizedIntTy;
1904  else
1905  ResultTy = Type::getVoidTy(Ctx);
1906 
1907  // Done with setting up arguments and return types, create the call:
1908  SmallVector<Type *, 6> ArgTys;
1909  for (Value *Arg : Args)
1910  ArgTys.push_back(Arg->getType());
1911  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1912  FunctionCallee LibcallFn =
1913  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1914  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1915  Call->setAttributes(Attr);
1916  Value *Result = Call;
1917 
1918  // And then, extract the results...
1919  if (ValueOperand && !UseSizedLibcall)
1920  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1921 
1922  if (CASExpected) {
1923  // The final result from the CAS is {load of 'expected' alloca, bool result
1924  // from call}
1925  Type *FinalResultTy = I->getType();
1926  Value *V = PoisonValue::get(FinalResultTy);
1927  Value *ExpectedOut = Builder.CreateAlignedLoad(
1928  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1929  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1930  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1931  V = Builder.CreateInsertValue(V, Result, 1);
1932  I->replaceAllUsesWith(V);
1933  } else if (HasResult) {
1934  Value *V;
1935  if (UseSizedLibcall)
1936  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1937  else {
1938  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1939  AllocaAlignment);
1940  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1941  }
1942  I->replaceAllUsesWith(V);
1943  }
1944  I->eraseFromParent();
1945  return true;
1946 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
ValueTypes.h
GetRMWLibcall
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Definition: AtomicExpandPass.cpp:1633
llvm::AtomicRMWInst::getOperationName
static StringRef getOperationName(BinOp Op)
Definition: Instructions.cpp:1763
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
canUseSizedAtomicCall
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
Definition: AtomicExpandPass.cpp:1577
AtomicOrdering.h
InstIterator.h
llvm::Function
Definition: Function.h:60
Pass.h
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:740
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:800
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:726
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
LowerAtomic.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AtomicCmpXchgInst::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:592
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2525
llvm::AtomicCmpXchgInst::isVolatile
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:565
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
OptimizationRemarkEmitter.h
llvm::TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic
@ CmpArithIntrinsic
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
Module.h
llvm::AttributeList
Definition: Attributes.h:430
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:345
llvm::LLVMContext::getSyncScopeNames
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Definition: LLVMContext.cpp:314
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:260
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:216
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::AtomicRMWInst::FMin
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:762
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:372
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::AtomicRMWInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:865
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
TargetLowering.h
llvm::TargetLoweringBase::AtomicExpansionKind::Expand
@ Expand
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::initializeAtomicExpandPass
void initializeAtomicExpandPass(PassRegistry &)
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:646
INITIALIZE_PASS
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Definition: AtomicExpandPass.cpp:145
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::User
Definition: User.h:44
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3486
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::AtomicCmpXchgInst::getFailureOrdering
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:604
llvm::ExtractValueInst::getNumIndices
unsigned getNumIndices() const
Definition: Instructions.h:2514
llvm::LoadInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:213
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:802
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
llvm::TargetLoweringBase::AtomicExpansionKind::CastToInteger
@ CastToInteger
llvm::Instruction
Definition: Instruction.h:42
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:736
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1163
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:291
llvm::AtomicExpandID
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
Definition: AtomicExpandPass.cpp:143
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::StoreInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:336
STLFunctionalExtras.h
AtomicExpandUtils.h
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::AtomicRMWInst::FMax
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:758
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:728
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:730
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:235
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::AtomicCmpXchgInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:569
llvm::EVT::getStoreSizeInBits
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:379
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:748
BasicBlock.h
llvm::AtomicCmpXchgInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:630
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:732
uint64_t
RuntimeLibcalls.h
llvm::TargetLoweringBase::AtomicExpansionKind::NotAtomic
@ NotAtomic
InstSimplifyFolder.h
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::AttributeList::addRetAttribute
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:546
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2848
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:842
llvm::SyncScope::ID
uint8_t ID
Definition: LLVMContext.h:46
llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
getAtomicOpSize
static unsigned getAtomicOpSize(LoadInst *LI)
Definition: AtomicExpandPass.cpp:151
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:744
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:738
ArrayRef.h
llvm::AtomicRMWInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:856
TargetPassConfig.h
llvm::lowerAtomicRMWInst
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
Definition: LowerAtomic.cpp:86
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:643
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::AtomicCmpXchgInst::isWeak
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:572
llvm::lowerAtomicCmpXchgInst
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:23
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::buildAtomicRMWValue
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:42
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
performMaskedAtomicOp
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
Definition: AtomicExpandPass.cpp:784
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::toCABI
AtomicOrderingCABI toCABI(AtomicOrdering AO)
Definition: AtomicOrdering.h:146
llvm::TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic
@ BitTestIntrinsic
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
DataLayout.h
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:73
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
extractMaskedValue
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:753
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
TargetSubtargetInfo.h
llvm::AtomicCmpXchgInst::setWeak
void setWeak(bool IsWeak)
Definition: Instructions.h:574
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
createCmpXchgInstFun
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
Definition: AtomicExpandPass.cpp:527
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:246
llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:822
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:225
atomicSizeSupported
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Definition: AtomicExpandPass.cpp:175
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
createMaskInstrs
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
Definition: AtomicExpandPass.cpp:681
llvm::ValueType
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Definition: ScheduleDAGInstrs.h:107
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:714
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AtomicExpandPass.cpp:58
Attributes.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::AtomicRMWInst::isVolatile
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:832
Constant.h
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:249
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:754
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2445
Success
#define Success
Definition: AArch64Disassembler.cpp:292
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:734
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:262
Casting.h
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::AtomicCmpXchgInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:639
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:774
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::AtomicRMWInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:873
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:125
llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition: Instructions.h:869
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
Instructions.h
llvm::LoadInst::isVolatile
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:210
llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly
SmallVector.h
User.h
insertMaskedValue
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:764
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:751
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2698
llvm::AtomicCmpXchgInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:554
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::expandAtomicRMWToCmpXchg
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Definition: AtomicExpandPass.cpp:1555
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:766
llvm::TargetLoweringBase::getMaxAtomicSizeInBitsSupported
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
Definition: TargetLowering.h:1950
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1473
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:746
raw_ostream.h
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Value.h
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:662
InitializePasses.h
llvm::AtomicCmpXchgInst::getMergedOrdering
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:617
llvm::ExtractValueInst::getIndices
ArrayRef< unsigned > getIndices() const
Definition: Instructions.h:2510
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:509
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1159
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:742
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732