LLVM 19.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
70 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
71 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
72 bool tryExpandAtomicLoad(LoadInst *LI);
73 bool expandAtomicLoadToLL(LoadInst *LI);
74 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
75 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
76 bool tryExpandAtomicStore(StoreInst *SI);
77 void expandAtomicStore(StoreInst *SI);
78 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
79 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
80 Value *
81 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
82 Align AddrAlign, AtomicOrdering MemOpOrder,
83 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
84 void expandAtomicOpToLLSC(
85 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
86 AtomicOrdering MemOpOrder,
87 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
88 void expandPartwordAtomicRMW(
90 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
91 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
93 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
94
95 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96 static Value *insertRMWCmpXchgLoop(
97 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
98 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
99 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
100 CreateCmpXchgInstFun CreateCmpXchg);
101 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102
103 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104 bool isIdempotentRMW(AtomicRMWInst *RMWI);
105 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106
107 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
108 Value *PointerOperand, Value *ValueOperand,
109 Value *CASExpected, AtomicOrdering Ordering,
110 AtomicOrdering Ordering2,
111 ArrayRef<RTLIB::Libcall> Libcalls);
112 void expandAtomicLoadToLibcall(LoadInst *LI);
113 void expandAtomicStoreToLibcall(StoreInst *LI);
114 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116
117 friend bool
119 CreateCmpXchgInstFun CreateCmpXchg);
120
121public:
122 bool run(Function &F, const TargetMachine *TM);
123};
124
125class AtomicExpandLegacy : public FunctionPass {
126public:
127 static char ID; // Pass identification, replacement for typeid
128
129 AtomicExpandLegacy() : FunctionPass(ID) {
131 }
132
133 bool runOnFunction(Function &F) override;
134};
135
136// IRBuilder to be used for replacement atomic instructions.
137struct ReplacementIRBuilder
138 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
139 MDNode *MMRAMD = nullptr;
140
141 // Preserves the DebugLoc from I, and preserves still valid metadata.
142 // Enable StrictFP builder mode when appropriate.
143 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
144 : IRBuilder(I->getContext(), DL,
146 [this](Instruction *I) { addMMRAMD(I); })) {
148 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
149 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
150 this->setIsFPConstrained(true);
151
152 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
153 }
154
155 void addMMRAMD(Instruction *I) {
157 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
158 }
159};
160
161} // end anonymous namespace
162
163char AtomicExpandLegacy::ID = 0;
164
165char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
166
168 "Expand Atomic instructions", false, false)
170INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
171 "Expand Atomic instructions", false, false)
172
173// Helper functions to retrieve the size of atomic instructions.
174static unsigned getAtomicOpSize(LoadInst *LI) {
175 const DataLayout &DL = LI->getModule()->getDataLayout();
176 return DL.getTypeStoreSize(LI->getType());
177}
178
179static unsigned getAtomicOpSize(StoreInst *SI) {
180 const DataLayout &DL = SI->getModule()->getDataLayout();
181 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
182}
183
184static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
185 const DataLayout &DL = RMWI->getModule()->getDataLayout();
186 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
187}
188
189static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
190 const DataLayout &DL = CASI->getModule()->getDataLayout();
191 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
192}
193
194// Determine if a particular atomic operation has a supported size,
195// and is of appropriate alignment, to be passed through for target
196// lowering. (Versus turning into a __atomic libcall)
197template <typename Inst>
198static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
199 unsigned Size = getAtomicOpSize(I);
200 Align Alignment = I->getAlign();
201 return Alignment >= Size &&
203}
204
205bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
206 const auto *Subtarget = TM->getSubtargetImpl(F);
207 if (!Subtarget->enableAtomicExpand())
208 return false;
209 TLI = Subtarget->getTargetLowering();
210 DL = &F.getParent()->getDataLayout();
211
213
214 // Changing control-flow while iterating through it is a bad idea, so gather a
215 // list of all atomic instructions before we start.
216 for (Instruction &I : instructions(F))
217 if (I.isAtomic() && !isa<FenceInst>(&I))
218 AtomicInsts.push_back(&I);
219
220 bool MadeChange = false;
221 for (auto *I : AtomicInsts) {
222 auto LI = dyn_cast<LoadInst>(I);
223 auto SI = dyn_cast<StoreInst>(I);
224 auto RMWI = dyn_cast<AtomicRMWInst>(I);
225 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
226 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
227
228 // If the Size/Alignment is not supported, replace with a libcall.
229 if (LI) {
230 if (!atomicSizeSupported(TLI, LI)) {
231 expandAtomicLoadToLibcall(LI);
232 MadeChange = true;
233 continue;
234 }
235 } else if (SI) {
236 if (!atomicSizeSupported(TLI, SI)) {
237 expandAtomicStoreToLibcall(SI);
238 MadeChange = true;
239 continue;
240 }
241 } else if (RMWI) {
242 if (!atomicSizeSupported(TLI, RMWI)) {
243 expandAtomicRMWToLibcall(RMWI);
244 MadeChange = true;
245 continue;
246 }
247 } else if (CASI) {
248 if (!atomicSizeSupported(TLI, CASI)) {
249 expandAtomicCASToLibcall(CASI);
250 MadeChange = true;
251 continue;
252 }
253 }
254
255 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
256 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
257 I = LI = convertAtomicLoadToIntegerType(LI);
258 MadeChange = true;
259 } else if (SI &&
260 TLI->shouldCastAtomicStoreInIR(SI) ==
261 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
262 I = SI = convertAtomicStoreToIntegerType(SI);
263 MadeChange = true;
264 } else if (RMWI &&
265 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
266 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
267 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
268 MadeChange = true;
269 } else if (CASI) {
270 // TODO: when we're ready to make the change at the IR level, we can
271 // extend convertCmpXchgToInteger for floating point too.
272 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
273 // TODO: add a TLI hook to control this so that each target can
274 // convert to lowering the original type one at a time.
275 I = CASI = convertCmpXchgToIntegerType(CASI);
276 MadeChange = true;
277 }
278 }
279
280 if (TLI->shouldInsertFencesForAtomic(I)) {
281 auto FenceOrdering = AtomicOrdering::Monotonic;
282 if (LI && isAcquireOrStronger(LI->getOrdering())) {
283 FenceOrdering = LI->getOrdering();
284 LI->setOrdering(AtomicOrdering::Monotonic);
285 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
286 FenceOrdering = SI->getOrdering();
287 SI->setOrdering(AtomicOrdering::Monotonic);
288 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
289 isAcquireOrStronger(RMWI->getOrdering()))) {
290 FenceOrdering = RMWI->getOrdering();
291 RMWI->setOrdering(AtomicOrdering::Monotonic);
292 } else if (CASI &&
293 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
294 TargetLoweringBase::AtomicExpansionKind::None &&
295 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
296 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
297 isAcquireOrStronger(CASI->getFailureOrdering()))) {
298 // If a compare and swap is lowered to LL/SC, we can do smarter fence
299 // insertion, with a stronger one on the success path than on the
300 // failure path. As a result, fence insertion is directly done by
301 // expandAtomicCmpXchg in that case.
302 FenceOrdering = CASI->getMergedOrdering();
303 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
304 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
305 }
306
307 if (FenceOrdering != AtomicOrdering::Monotonic) {
308 MadeChange |= bracketInstWithFences(I, FenceOrdering);
309 }
310 } else if (I->hasAtomicStore() &&
311 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
312 auto FenceOrdering = AtomicOrdering::Monotonic;
313 if (SI)
314 FenceOrdering = SI->getOrdering();
315 else if (RMWI)
316 FenceOrdering = RMWI->getOrdering();
317 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
318 TargetLoweringBase::AtomicExpansionKind::LLSC)
319 // LLSC is handled in expandAtomicCmpXchg().
320 FenceOrdering = CASI->getSuccessOrdering();
321
322 IRBuilder Builder(I);
323 if (auto TrailingFence =
324 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
325 TrailingFence->moveAfter(I);
326 MadeChange = true;
327 }
328 }
329
330 if (LI)
331 MadeChange |= tryExpandAtomicLoad(LI);
332 else if (SI)
333 MadeChange |= tryExpandAtomicStore(SI);
334 else if (RMWI) {
335 // There are two different ways of expanding RMW instructions:
336 // - into a load if it is idempotent
337 // - into a Cmpxchg/LL-SC loop otherwise
338 // we try them in that order.
339
340 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
341 MadeChange = true;
342 } else {
343 MadeChange |= tryExpandAtomicRMW(RMWI);
344 }
345 } else if (CASI)
346 MadeChange |= tryExpandAtomicCmpXchg(CASI);
347 }
348 return MadeChange;
349}
350
351bool AtomicExpandLegacy::runOnFunction(Function &F) {
352
353 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
354 if (!TPC)
355 return false;
356 auto *TM = &TPC->getTM<TargetMachine>();
357 AtomicExpandImpl AE;
358 return AE.run(F, TM);
359}
360
362 return new AtomicExpandLegacy();
363}
364
367 AtomicExpandImpl AE;
368
369 bool Changed = AE.run(F, TM);
370 if (!Changed)
371 return PreservedAnalyses::all();
372
374}
375
376bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
377 AtomicOrdering Order) {
378 ReplacementIRBuilder Builder(I, *DL);
379
380 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
381
382 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
383 // We have a guard here because not every atomic operation generates a
384 // trailing fence.
385 if (TrailingFence)
386 TrailingFence->moveAfter(I);
387
388 return (LeadingFence || TrailingFence);
389}
390
391/// Get the iX type with the same bitwidth as T.
393AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
394 EVT VT = TLI->getMemValueType(DL, T);
395 unsigned BitWidth = VT.getStoreSizeInBits();
396 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
397 return IntegerType::get(T->getContext(), BitWidth);
398}
399
400/// Convert an atomic load of a non-integral type to an integer load of the
401/// equivalent bitwidth. See the function comment on
402/// convertAtomicStoreToIntegerType for background.
403LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
404 auto *M = LI->getModule();
405 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
406
407 ReplacementIRBuilder Builder(LI, *DL);
408
410
411 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
412 NewLI->setAlignment(LI->getAlign());
413 NewLI->setVolatile(LI->isVolatile());
414 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
415 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
416
417 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
418 LI->replaceAllUsesWith(NewVal);
419 LI->eraseFromParent();
420 return NewLI;
421}
422
424AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
425 auto *M = RMWI->getModule();
426 Type *NewTy =
427 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
428
429 ReplacementIRBuilder Builder(RMWI, *DL);
430
431 Value *Addr = RMWI->getPointerOperand();
432 Value *Val = RMWI->getValOperand();
433 Value *NewVal = Val->getType()->isPointerTy()
434 ? Builder.CreatePtrToInt(Val, NewTy)
435 : Builder.CreateBitCast(Val, NewTy);
436
437 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
438 RMWI->getAlign(), RMWI->getOrdering(),
439 RMWI->getSyncScopeID());
440 NewRMWI->setVolatile(RMWI->isVolatile());
441 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
442
443 Value *NewRVal = RMWI->getType()->isPointerTy()
444 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
445 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
446 RMWI->replaceAllUsesWith(NewRVal);
447 RMWI->eraseFromParent();
448 return NewRMWI;
449}
450
451bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
452 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
454 return false;
456 expandAtomicOpToLLSC(
457 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
458 LI->getOrdering(),
459 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
460 return true;
462 return expandAtomicLoadToLL(LI);
464 return expandAtomicLoadToCmpXchg(LI);
467 return true;
468 default:
469 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
470 }
471}
472
473bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
474 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
476 return false;
478 expandAtomicStore(SI);
479 return true;
481 SI->setAtomic(AtomicOrdering::NotAtomic);
482 return true;
483 default:
484 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
485 }
486}
487
488bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
489 ReplacementIRBuilder Builder(LI, *DL);
490
491 // On some architectures, load-linked instructions are atomic for larger
492 // sizes than normal loads. For example, the only 64-bit load guaranteed
493 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
494 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
495 LI->getPointerOperand(), LI->getOrdering());
496 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
497
498 LI->replaceAllUsesWith(Val);
499 LI->eraseFromParent();
500
501 return true;
502}
503
504bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
505 ReplacementIRBuilder Builder(LI, *DL);
506 AtomicOrdering Order = LI->getOrdering();
507 if (Order == AtomicOrdering::Unordered)
509
511 Type *Ty = LI->getType();
512 Constant *DummyVal = Constant::getNullValue(Ty);
513
514 Value *Pair = Builder.CreateAtomicCmpXchg(
515 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
517 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
518
519 LI->replaceAllUsesWith(Loaded);
520 LI->eraseFromParent();
521
522 return true;
523}
524
525/// Convert an atomic store of a non-integral type to an integer store of the
526/// equivalent bitwidth. We used to not support floating point or vector
527/// atomics in the IR at all. The backends learned to deal with the bitcast
528/// idiom because that was the only way of expressing the notion of a atomic
529/// float or vector store. The long term plan is to teach each backend to
530/// instruction select from the original atomic store, but as a migration
531/// mechanism, we convert back to the old format which the backends understand.
532/// Each backend will need individual work to recognize the new format.
533StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
534 ReplacementIRBuilder Builder(SI, *DL);
535 auto *M = SI->getModule();
536 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
537 M->getDataLayout());
538 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
539
540 Value *Addr = SI->getPointerOperand();
541
542 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
543 NewSI->setAlignment(SI->getAlign());
544 NewSI->setVolatile(SI->isVolatile());
545 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
546 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
547 SI->eraseFromParent();
548 return NewSI;
549}
550
551void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
552 // This function is only called on atomic stores that are too large to be
553 // atomic if implemented as a native store. So we replace them by an
554 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
555 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
556 // It is the responsibility of the target to only signal expansion via
557 // shouldExpandAtomicRMW in cases where this is required and possible.
558 ReplacementIRBuilder Builder(SI, *DL);
559 AtomicOrdering Ordering = SI->getOrdering();
561 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
563 : Ordering;
564 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
565 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
566 SI->getAlign(), RMWOrdering);
567 SI->eraseFromParent();
568
569 // Now we have an appropriate swap instruction, lower it as usual.
570 tryExpandAtomicRMW(AI);
571}
572
574 Value *Loaded, Value *NewVal, Align AddrAlign,
575 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
576 Value *&Success, Value *&NewLoaded) {
577 Type *OrigTy = NewVal->getType();
578
579 // This code can go away when cmpxchg supports FP and vector types.
580 assert(!OrigTy->isPointerTy());
581 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
582 if (NeedBitcast) {
583 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
584 NewVal = Builder.CreateBitCast(NewVal, IntTy);
585 Loaded = Builder.CreateBitCast(Loaded, IntTy);
586 }
587
588 Value *Pair = Builder.CreateAtomicCmpXchg(
589 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
591 Success = Builder.CreateExtractValue(Pair, 1, "success");
592 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
593
594 if (NeedBitcast)
595 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
596}
597
598bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
599 LLVMContext &Ctx = AI->getModule()->getContext();
600 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
601 switch (Kind) {
603 return false;
605 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
606 unsigned ValueSize = getAtomicOpSize(AI);
607 if (ValueSize < MinCASSize) {
608 expandPartwordAtomicRMW(AI,
610 } else {
611 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
612 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
613 AI->getValOperand());
614 };
615 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
616 AI->getAlign(), AI->getOrdering(), PerformOp);
617 }
618 return true;
619 }
621 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
622 unsigned ValueSize = getAtomicOpSize(AI);
623 if (ValueSize < MinCASSize) {
624 expandPartwordAtomicRMW(AI,
626 } else {
628 Ctx.getSyncScopeNames(SSNs);
629 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
630 ? "system"
631 : SSNs[AI->getSyncScopeID()];
633 ORE.emit([&]() {
634 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
635 << "A compare and swap loop was generated for an atomic "
636 << AI->getOperationName(AI->getOperation()) << " operation at "
637 << MemScope << " memory scope";
638 });
640 }
641 return true;
642 }
644 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
645 unsigned ValueSize = getAtomicOpSize(AI);
646 if (ValueSize < MinCASSize) {
648 // Widen And/Or/Xor and give the target another chance at expanding it.
651 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
652 return true;
653 }
654 }
655 expandAtomicRMWToMaskedIntrinsic(AI);
656 return true;
657 }
659 TLI->emitBitTestAtomicRMWIntrinsic(AI);
660 return true;
661 }
663 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
664 return true;
665 }
667 return lowerAtomicRMWInst(AI);
669 TLI->emitExpandAtomicRMW(AI);
670 return true;
671 default:
672 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
673 }
674}
675
676namespace {
677
678struct PartwordMaskValues {
679 // These three fields are guaranteed to be set by createMaskInstrs.
680 Type *WordType = nullptr;
681 Type *ValueType = nullptr;
682 Type *IntValueType = nullptr;
683 Value *AlignedAddr = nullptr;
684 Align AlignedAddrAlignment;
685 // The remaining fields can be null.
686 Value *ShiftAmt = nullptr;
687 Value *Mask = nullptr;
688 Value *Inv_Mask = nullptr;
689};
690
692raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
693 auto PrintObj = [&O](auto *V) {
694 if (V)
695 O << *V;
696 else
697 O << "nullptr";
698 O << '\n';
699 };
700 O << "PartwordMaskValues {\n";
701 O << " WordType: ";
702 PrintObj(PMV.WordType);
703 O << " ValueType: ";
704 PrintObj(PMV.ValueType);
705 O << " AlignedAddr: ";
706 PrintObj(PMV.AlignedAddr);
707 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
708 O << " ShiftAmt: ";
709 PrintObj(PMV.ShiftAmt);
710 O << " Mask: ";
711 PrintObj(PMV.Mask);
712 O << " Inv_Mask: ";
713 PrintObj(PMV.Inv_Mask);
714 O << "}\n";
715 return O;
716}
717
718} // end anonymous namespace
719
720/// This is a helper function which builds instructions to provide
721/// values necessary for partword atomic operations. It takes an
722/// incoming address, Addr, and ValueType, and constructs the address,
723/// shift-amounts and masks needed to work with a larger value of size
724/// WordSize.
725///
726/// AlignedAddr: Addr rounded down to a multiple of WordSize
727///
728/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
729/// from AlignAddr for it to have the same value as if
730/// ValueType was loaded from Addr.
731///
732/// Mask: Value to mask with the value loaded from AlignAddr to
733/// include only the part that would've been loaded from Addr.
734///
735/// Inv_Mask: The inverse of Mask.
736static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
738 Value *Addr, Align AddrAlign,
739 unsigned MinWordSize) {
740 PartwordMaskValues PMV;
741
742 Module *M = I->getModule();
743 LLVMContext &Ctx = M->getContext();
744 const DataLayout &DL = M->getDataLayout();
745 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
746
747 PMV.ValueType = PMV.IntValueType = ValueType;
748 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
749 PMV.IntValueType =
750 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
751
752 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
753 : ValueType;
754 if (PMV.ValueType == PMV.WordType) {
755 PMV.AlignedAddr = Addr;
756 PMV.AlignedAddrAlignment = AddrAlign;
757 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
758 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
759 return PMV;
760 }
761
762 PMV.AlignedAddrAlignment = Align(MinWordSize);
763
764 assert(ValueSize < MinWordSize);
765
766 PointerType *PtrTy = cast<PointerType>(Addr->getType());
767 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
768 Value *PtrLSB;
769
770 if (AddrAlign < MinWordSize) {
771 PMV.AlignedAddr = Builder.CreateIntrinsic(
772 Intrinsic::ptrmask, {PtrTy, IntTy},
773 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
774 "AlignedAddr");
775
776 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
777 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
778 } else {
779 // If the alignment is high enough, the LSB are known 0.
780 PMV.AlignedAddr = Addr;
781 PtrLSB = ConstantInt::getNullValue(IntTy);
782 }
783
784 if (DL.isLittleEndian()) {
785 // turn bytes into bits
786 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
787 } else {
788 // turn bytes into bits, and count from the other side.
789 PMV.ShiftAmt = Builder.CreateShl(
790 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
791 }
792
793 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
794 PMV.Mask = Builder.CreateShl(
795 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
796 "Mask");
797
798 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
799
800 return PMV;
801}
802
803static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
804 const PartwordMaskValues &PMV) {
805 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
806 if (PMV.WordType == PMV.ValueType)
807 return WideWord;
808
809 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
810 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
811 return Builder.CreateBitCast(Trunc, PMV.ValueType);
812}
813
814static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
815 Value *Updated, const PartwordMaskValues &PMV) {
816 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
817 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
818 if (PMV.WordType == PMV.ValueType)
819 return Updated;
820
821 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
822
823 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
824 Value *Shift =
825 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
826 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
827 Value *Or = Builder.CreateOr(And, Shift, "inserted");
828 return Or;
829}
830
831/// Emit IR to implement a masked version of a given atomicrmw
832/// operation. (That is, only the bits under the Mask should be
833/// affected by the operation)
835 IRBuilderBase &Builder, Value *Loaded,
836 Value *Shifted_Inc, Value *Inc,
837 const PartwordMaskValues &PMV) {
838 // TODO: update to use
839 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
840 // to merge bits from two values without requiring PMV.Inv_Mask.
841 switch (Op) {
842 case AtomicRMWInst::Xchg: {
843 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
844 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
845 return FinalVal;
846 }
850 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
853 case AtomicRMWInst::Nand: {
854 // The other arithmetic ops need to be masked into place.
855 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
856 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
857 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
858 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
859 return FinalVal;
860 }
871 // Finally, other ops will operate on the full value, so truncate down to
872 // the original size, and expand out again after doing the
873 // operation. Bitcasts will be inserted for FP values.
874 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
875 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
876 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
877 return FinalVal;
878 }
879 default:
880 llvm_unreachable("Unknown atomic op");
881 }
882}
883
884/// Expand a sub-word atomicrmw operation into an appropriate
885/// word-sized operation.
886///
887/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
888/// way as a typical atomicrmw expansion. The only difference here is
889/// that the operation inside of the loop may operate upon only a
890/// part of the value.
891void AtomicExpandImpl::expandPartwordAtomicRMW(
893 // Widen And/Or/Xor and give the target another chance at expanding it.
897 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
898 return;
899 }
900 AtomicOrdering MemOpOrder = AI->getOrdering();
901 SyncScope::ID SSID = AI->getSyncScopeID();
902
903 ReplacementIRBuilder Builder(AI, *DL);
904
905 PartwordMaskValues PMV =
906 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
907 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
908
909 Value *ValOperand_Shifted = nullptr;
912 ValOperand_Shifted =
913 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
914 PMV.ShiftAmt, "ValOperand_Shifted");
915 }
916
917 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
918 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
919 AI->getValOperand(), PMV);
920 };
921
922 Value *OldResult;
924 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
925 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
926 PerformPartwordOp, createCmpXchgInstFun);
927 } else {
929 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
930 PMV.AlignedAddrAlignment, MemOpOrder,
931 PerformPartwordOp);
932 }
933
934 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
935 AI->replaceAllUsesWith(FinalOldResult);
936 AI->eraseFromParent();
937}
938
939// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
940AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
941 ReplacementIRBuilder Builder(AI, *DL);
943
946 "Unable to widen operation");
947
948 PartwordMaskValues PMV =
949 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
950 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
951
952 Value *ValOperand_Shifted =
953 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
954 PMV.ShiftAmt, "ValOperand_Shifted");
955
956 Value *NewOperand;
957
958 if (Op == AtomicRMWInst::And)
959 NewOperand =
960 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
961 else
962 NewOperand = ValOperand_Shifted;
963
964 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
965 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
966 AI->getOrdering(), AI->getSyncScopeID());
967 // TODO: Preserve metadata
968
969 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
970 AI->replaceAllUsesWith(FinalOldResult);
971 AI->eraseFromParent();
972 return NewAI;
973}
974
975bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
976 // The basic idea here is that we're expanding a cmpxchg of a
977 // smaller memory size up to a word-sized cmpxchg. To do this, we
978 // need to add a retry-loop for strong cmpxchg, so that
979 // modifications to other parts of the word don't cause a spurious
980 // failure.
981
982 // This generates code like the following:
983 // [[Setup mask values PMV.*]]
984 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
985 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
986 // %InitLoaded = load i32* %addr
987 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
988 // br partword.cmpxchg.loop
989 // partword.cmpxchg.loop:
990 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
991 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
992 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
993 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
994 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
995 // i32 %FullWord_NewVal success_ordering failure_ordering
996 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
997 // %Success = extractvalue { i32, i1 } %NewCI, 1
998 // br i1 %Success, label %partword.cmpxchg.end,
999 // label %partword.cmpxchg.failure
1000 // partword.cmpxchg.failure:
1001 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1002 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1003 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1004 // label %partword.cmpxchg.end
1005 // partword.cmpxchg.end:
1006 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1007 // %FinalOldVal = trunc i32 %tmp1 to i8
1008 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1009 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1010
1011 Value *Addr = CI->getPointerOperand();
1012 Value *Cmp = CI->getCompareOperand();
1013 Value *NewVal = CI->getNewValOperand();
1014
1015 BasicBlock *BB = CI->getParent();
1016 Function *F = BB->getParent();
1017 ReplacementIRBuilder Builder(CI, *DL);
1018 LLVMContext &Ctx = Builder.getContext();
1019
1020 BasicBlock *EndBB =
1021 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1022 auto FailureBB =
1023 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1024 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1025
1026 // The split call above "helpfully" added a branch at the end of BB
1027 // (to the wrong place).
1028 std::prev(BB->end())->eraseFromParent();
1029 Builder.SetInsertPoint(BB);
1030
1031 PartwordMaskValues PMV =
1032 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1033 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1034
1035 // Shift the incoming values over, into the right location in the word.
1036 Value *NewVal_Shifted =
1037 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1038 Value *Cmp_Shifted =
1039 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1040
1041 // Load the entire current word, and mask into place the expected and new
1042 // values
1043 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1044 InitLoaded->setVolatile(CI->isVolatile());
1045 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1046 Builder.CreateBr(LoopBB);
1047
1048 // partword.cmpxchg.loop:
1049 Builder.SetInsertPoint(LoopBB);
1050 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1051 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1052
1053 // Mask/Or the expected and new values into place in the loaded word.
1054 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1055 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1056 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1057 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1059 NewCI->setVolatile(CI->isVolatile());
1060 // When we're building a strong cmpxchg, we need a loop, so you
1061 // might think we could use a weak cmpxchg inside. But, using strong
1062 // allows the below comparison for ShouldContinue, and we're
1063 // expecting the underlying cmpxchg to be a machine instruction,
1064 // which is strong anyways.
1065 NewCI->setWeak(CI->isWeak());
1066
1067 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1068 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1069
1070 if (CI->isWeak())
1071 Builder.CreateBr(EndBB);
1072 else
1073 Builder.CreateCondBr(Success, EndBB, FailureBB);
1074
1075 // partword.cmpxchg.failure:
1076 Builder.SetInsertPoint(FailureBB);
1077 // Upon failure, verify that the masked-out part of the loaded value
1078 // has been modified. If it didn't, abort the cmpxchg, since the
1079 // masked-in part must've.
1080 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1081 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1082 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1083
1084 // Add the second value to the phi from above
1085 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1086
1087 // partword.cmpxchg.end:
1088 Builder.SetInsertPoint(CI);
1089
1090 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1091 Value *Res = PoisonValue::get(CI->getType());
1092 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1093 Res = Builder.CreateInsertValue(Res, Success, 1);
1094
1095 CI->replaceAllUsesWith(Res);
1096 CI->eraseFromParent();
1097 return true;
1098}
1099
1100void AtomicExpandImpl::expandAtomicOpToLLSC(
1101 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1102 AtomicOrdering MemOpOrder,
1103 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1104 ReplacementIRBuilder Builder(I, *DL);
1105 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1106 MemOpOrder, PerformOp);
1107
1108 I->replaceAllUsesWith(Loaded);
1109 I->eraseFromParent();
1110}
1111
1112void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1113 ReplacementIRBuilder Builder(AI, *DL);
1114
1115 PartwordMaskValues PMV =
1116 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1117 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1118
1119 // The value operand must be sign-extended for signed min/max so that the
1120 // target's signed comparison instructions can be used. Otherwise, just
1121 // zero-ext.
1122 Instruction::CastOps CastOp = Instruction::ZExt;
1123 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1124 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1125 CastOp = Instruction::SExt;
1126
1127 Value *ValOperand_Shifted = Builder.CreateShl(
1128 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1129 PMV.ShiftAmt, "ValOperand_Shifted");
1130 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1131 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1132 AI->getOrdering());
1133 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1134 AI->replaceAllUsesWith(FinalOldResult);
1135 AI->eraseFromParent();
1136}
1137
1138void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1139 AtomicCmpXchgInst *CI) {
1140 ReplacementIRBuilder Builder(CI, *DL);
1141
1142 PartwordMaskValues PMV = createMaskInstrs(
1143 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1144 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1145
1146 Value *CmpVal_Shifted = Builder.CreateShl(
1147 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1148 "CmpVal_Shifted");
1149 Value *NewVal_Shifted = Builder.CreateShl(
1150 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1151 "NewVal_Shifted");
1152 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1153 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1154 CI->getMergedOrdering());
1155 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1156 Value *Res = PoisonValue::get(CI->getType());
1157 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1158 Value *Success = Builder.CreateICmpEQ(
1159 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1160 Res = Builder.CreateInsertValue(Res, Success, 1);
1161
1162 CI->replaceAllUsesWith(Res);
1163 CI->eraseFromParent();
1164}
1165
1166Value *AtomicExpandImpl::insertRMWLLSCLoop(
1167 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1168 AtomicOrdering MemOpOrder,
1169 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1170 LLVMContext &Ctx = Builder.getContext();
1171 BasicBlock *BB = Builder.GetInsertBlock();
1172 Function *F = BB->getParent();
1173
1174 assert(AddrAlign >=
1175 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1176 "Expected at least natural alignment at this point.");
1177
1178 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1179 //
1180 // The standard expansion we produce is:
1181 // [...]
1182 // atomicrmw.start:
1183 // %loaded = @load.linked(%addr)
1184 // %new = some_op iN %loaded, %incr
1185 // %stored = @store_conditional(%new, %addr)
1186 // %try_again = icmp i32 ne %stored, 0
1187 // br i1 %try_again, label %loop, label %atomicrmw.end
1188 // atomicrmw.end:
1189 // [...]
1190 BasicBlock *ExitBB =
1191 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1192 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1193
1194 // The split call above "helpfully" added a branch at the end of BB (to the
1195 // wrong place).
1196 std::prev(BB->end())->eraseFromParent();
1197 Builder.SetInsertPoint(BB);
1198 Builder.CreateBr(LoopBB);
1199
1200 // Start the main loop block now that we've taken care of the preliminaries.
1201 Builder.SetInsertPoint(LoopBB);
1202 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1203
1204 Value *NewVal = PerformOp(Builder, Loaded);
1205
1206 Value *StoreSuccess =
1207 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1208 Value *TryAgain = Builder.CreateICmpNE(
1209 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1210 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1211
1212 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1213 return Loaded;
1214}
1215
1216/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1217/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1218/// IR. As a migration step, we convert back to what use to be the standard
1219/// way to represent a pointer cmpxchg so that we can update backends one by
1220/// one.
1222AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1223 auto *M = CI->getModule();
1224 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1225 M->getDataLayout());
1226
1227 ReplacementIRBuilder Builder(CI, *DL);
1228
1229 Value *Addr = CI->getPointerOperand();
1230
1231 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1232 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1233
1234 auto *NewCI = Builder.CreateAtomicCmpXchg(
1235 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1236 CI->getFailureOrdering(), CI->getSyncScopeID());
1237 NewCI->setVolatile(CI->isVolatile());
1238 NewCI->setWeak(CI->isWeak());
1239 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1240
1241 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1242 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1243
1244 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1245
1246 Value *Res = PoisonValue::get(CI->getType());
1247 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1248 Res = Builder.CreateInsertValue(Res, Succ, 1);
1249
1250 CI->replaceAllUsesWith(Res);
1251 CI->eraseFromParent();
1252 return NewCI;
1253}
1254
1255bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1256 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1257 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1258 Value *Addr = CI->getPointerOperand();
1259 BasicBlock *BB = CI->getParent();
1260 Function *F = BB->getParent();
1261 LLVMContext &Ctx = F->getContext();
1262 // If shouldInsertFencesForAtomic() returns true, then the target does not
1263 // want to deal with memory orders, and emitLeading/TrailingFence should take
1264 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1265 // should preserve the ordering.
1266 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1267 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1269 : CI->getMergedOrdering();
1270
1271 // In implementations which use a barrier to achieve release semantics, we can
1272 // delay emitting this barrier until we know a store is actually going to be
1273 // attempted. The cost of this delay is that we need 2 copies of the block
1274 // emitting the load-linked, affecting code size.
1275 //
1276 // Ideally, this logic would be unconditional except for the minsize check
1277 // since in other cases the extra blocks naturally collapse down to the
1278 // minimal loop. Unfortunately, this puts too much stress on later
1279 // optimisations so we avoid emitting the extra logic in those cases too.
1280 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1281 SuccessOrder != AtomicOrdering::Monotonic &&
1282 SuccessOrder != AtomicOrdering::Acquire &&
1283 !F->hasMinSize();
1284
1285 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1286 // do it even on minsize.
1287 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1288
1289 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1290 //
1291 // The full expansion we produce is:
1292 // [...]
1293 // %aligned.addr = ...
1294 // cmpxchg.start:
1295 // %unreleasedload = @load.linked(%aligned.addr)
1296 // %unreleasedload.extract = extract value from %unreleasedload
1297 // %should_store = icmp eq %unreleasedload.extract, %desired
1298 // br i1 %should_store, label %cmpxchg.releasingstore,
1299 // label %cmpxchg.nostore
1300 // cmpxchg.releasingstore:
1301 // fence?
1302 // br label cmpxchg.trystore
1303 // cmpxchg.trystore:
1304 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1305 // [%releasedload, %cmpxchg.releasedload]
1306 // %updated.new = insert %new into %loaded.trystore
1307 // %stored = @store_conditional(%updated.new, %aligned.addr)
1308 // %success = icmp eq i32 %stored, 0
1309 // br i1 %success, label %cmpxchg.success,
1310 // label %cmpxchg.releasedload/%cmpxchg.failure
1311 // cmpxchg.releasedload:
1312 // %releasedload = @load.linked(%aligned.addr)
1313 // %releasedload.extract = extract value from %releasedload
1314 // %should_store = icmp eq %releasedload.extract, %desired
1315 // br i1 %should_store, label %cmpxchg.trystore,
1316 // label %cmpxchg.failure
1317 // cmpxchg.success:
1318 // fence?
1319 // br label %cmpxchg.end
1320 // cmpxchg.nostore:
1321 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1322 // [%releasedload,
1323 // %cmpxchg.releasedload/%cmpxchg.trystore]
1324 // @load_linked_fail_balance()?
1325 // br label %cmpxchg.failure
1326 // cmpxchg.failure:
1327 // fence?
1328 // br label %cmpxchg.end
1329 // cmpxchg.end:
1330 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1331 // [%loaded.trystore, %cmpxchg.trystore]
1332 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1333 // %loaded = extract value from %loaded.exit
1334 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1335 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1336 // [...]
1337 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1338 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1339 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1340 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1341 auto ReleasedLoadBB =
1342 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1343 auto TryStoreBB =
1344 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1345 auto ReleasingStoreBB =
1346 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1347 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1348
1349 ReplacementIRBuilder Builder(CI, *DL);
1350
1351 // The split call above "helpfully" added a branch at the end of BB (to the
1352 // wrong place), but we might want a fence too. It's easiest to just remove
1353 // the branch entirely.
1354 std::prev(BB->end())->eraseFromParent();
1355 Builder.SetInsertPoint(BB);
1356 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1357 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1358
1359 PartwordMaskValues PMV =
1360 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1361 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1362 Builder.CreateBr(StartBB);
1363
1364 // Start the main loop block now that we've taken care of the preliminaries.
1365 Builder.SetInsertPoint(StartBB);
1366 Value *UnreleasedLoad =
1367 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1368 Value *UnreleasedLoadExtract =
1369 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1370 Value *ShouldStore = Builder.CreateICmpEQ(
1371 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1372
1373 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1374 // jump straight past that fence instruction (if it exists).
1375 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1376
1377 Builder.SetInsertPoint(ReleasingStoreBB);
1378 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1379 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1380 Builder.CreateBr(TryStoreBB);
1381
1382 Builder.SetInsertPoint(TryStoreBB);
1383 PHINode *LoadedTryStore =
1384 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1385 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1386 Value *NewValueInsert =
1387 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1388 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1389 PMV.AlignedAddr, MemOpOrder);
1390 StoreSuccess = Builder.CreateICmpEQ(
1391 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1392 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1393 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1394 CI->isWeak() ? FailureBB : RetryBB);
1395
1396 Builder.SetInsertPoint(ReleasedLoadBB);
1397 Value *SecondLoad;
1398 if (HasReleasedLoadBB) {
1399 SecondLoad =
1400 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1401 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1402 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1403 CI->getCompareOperand(), "should_store");
1404
1405 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1406 // jump straight past that fence instruction (if it exists).
1407 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1408 // Update PHI node in TryStoreBB.
1409 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1410 } else
1411 Builder.CreateUnreachable();
1412
1413 // Make sure later instructions don't get reordered with a fence if
1414 // necessary.
1415 Builder.SetInsertPoint(SuccessBB);
1416 if (ShouldInsertFencesForAtomic ||
1417 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1418 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1419 Builder.CreateBr(ExitBB);
1420
1421 Builder.SetInsertPoint(NoStoreBB);
1422 PHINode *LoadedNoStore =
1423 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1424 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1425 if (HasReleasedLoadBB)
1426 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1427
1428 // In the failing case, where we don't execute the store-conditional, the
1429 // target might want to balance out the load-linked with a dedicated
1430 // instruction (e.g., on ARM, clearing the exclusive monitor).
1431 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1432 Builder.CreateBr(FailureBB);
1433
1434 Builder.SetInsertPoint(FailureBB);
1435 PHINode *LoadedFailure =
1436 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1437 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1438 if (CI->isWeak())
1439 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1440 if (ShouldInsertFencesForAtomic)
1441 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1442 Builder.CreateBr(ExitBB);
1443
1444 // Finally, we have control-flow based knowledge of whether the cmpxchg
1445 // succeeded or not. We expose this to later passes by converting any
1446 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1447 // PHI.
1448 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1449 PHINode *LoadedExit =
1450 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1451 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1452 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1453 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1454 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1455 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1456
1457 // This is the "exit value" from the cmpxchg expansion. It may be of
1458 // a type wider than the one in the cmpxchg instruction.
1459 Value *LoadedFull = LoadedExit;
1460
1461 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1462 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1463
1464 // Look for any users of the cmpxchg that are just comparing the loaded value
1465 // against the desired one, and replace them with the CFG-derived version.
1467 for (auto *User : CI->users()) {
1468 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1469 if (!EV)
1470 continue;
1471
1472 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1473 "weird extraction from { iN, i1 }");
1474
1475 if (EV->getIndices()[0] == 0)
1476 EV->replaceAllUsesWith(Loaded);
1477 else
1479
1480 PrunedInsts.push_back(EV);
1481 }
1482
1483 // We can remove the instructions now we're no longer iterating through them.
1484 for (auto *EV : PrunedInsts)
1485 EV->eraseFromParent();
1486
1487 if (!CI->use_empty()) {
1488 // Some use of the full struct return that we don't understand has happened,
1489 // so we've got to reconstruct it properly.
1490 Value *Res;
1491 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1492 Res = Builder.CreateInsertValue(Res, Success, 1);
1493
1494 CI->replaceAllUsesWith(Res);
1495 }
1496
1497 CI->eraseFromParent();
1498 return true;
1499}
1500
1501bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1502 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1503 if (!C)
1504 return false;
1505
1507 switch (Op) {
1508 case AtomicRMWInst::Add:
1509 case AtomicRMWInst::Sub:
1510 case AtomicRMWInst::Or:
1511 case AtomicRMWInst::Xor:
1512 return C->isZero();
1513 case AtomicRMWInst::And:
1514 return C->isMinusOne();
1515 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1516 default:
1517 return false;
1518 }
1519}
1520
1521bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1522 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1523 tryExpandAtomicLoad(ResultingLoad);
1524 return true;
1525 }
1526 return false;
1527}
1528
1529Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1530 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1531 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1532 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1533 CreateCmpXchgInstFun CreateCmpXchg) {
1534 LLVMContext &Ctx = Builder.getContext();
1535 BasicBlock *BB = Builder.GetInsertBlock();
1536 Function *F = BB->getParent();
1537
1538 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1539 //
1540 // The standard expansion we produce is:
1541 // [...]
1542 // %init_loaded = load atomic iN* %addr
1543 // br label %loop
1544 // loop:
1545 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1546 // %new = some_op iN %loaded, %incr
1547 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1548 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1549 // %success = extractvalue { iN, i1 } %pair, 1
1550 // br i1 %success, label %atomicrmw.end, label %loop
1551 // atomicrmw.end:
1552 // [...]
1553 BasicBlock *ExitBB =
1554 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1555 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1556
1557 // The split call above "helpfully" added a branch at the end of BB (to the
1558 // wrong place), but we want a load. It's easiest to just remove
1559 // the branch entirely.
1560 std::prev(BB->end())->eraseFromParent();
1561 Builder.SetInsertPoint(BB);
1562 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1563 Builder.CreateBr(LoopBB);
1564
1565 // Start the main loop block now that we've taken care of the preliminaries.
1566 Builder.SetInsertPoint(LoopBB);
1567 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1568 Loaded->addIncoming(InitLoaded, BB);
1569
1570 Value *NewVal = PerformOp(Builder, Loaded);
1571
1572 Value *NewLoaded = nullptr;
1573 Value *Success = nullptr;
1574
1575 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1576 MemOpOrder == AtomicOrdering::Unordered
1578 : MemOpOrder,
1579 SSID, Success, NewLoaded);
1580 assert(Success && NewLoaded);
1581
1582 Loaded->addIncoming(NewLoaded, LoopBB);
1583
1584 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1585
1586 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1587 return NewLoaded;
1588}
1589
1590bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1591 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1592 unsigned ValueSize = getAtomicOpSize(CI);
1593
1594 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1595 default:
1596 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1598 if (ValueSize < MinCASSize)
1599 return expandPartwordCmpXchg(CI);
1600 return false;
1602 return expandAtomicCmpXchg(CI);
1603 }
1605 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1606 return true;
1608 return lowerAtomicCmpXchgInst(CI);
1609 }
1610}
1611
1612// Note: This function is exposed externally by AtomicExpandUtils.h
1614 CreateCmpXchgInstFun CreateCmpXchg) {
1615 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1616 Builder.setIsFPConstrained(
1617 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1618
1619 // FIXME: If FP exceptions are observable, we should force them off for the
1620 // loop for the FP atomics.
1621 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1622 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1623 AI->getOrdering(), AI->getSyncScopeID(),
1624 [&](IRBuilderBase &Builder, Value *Loaded) {
1625 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1626 AI->getValOperand());
1627 },
1628 CreateCmpXchg);
1629
1630 AI->replaceAllUsesWith(Loaded);
1631 AI->eraseFromParent();
1632 return true;
1633}
1634
1635// In order to use one of the sized library calls such as
1636// __atomic_fetch_add_4, the alignment must be sufficient, the size
1637// must be one of the potentially-specialized sizes, and the value
1638// type must actually exist in C on the target (otherwise, the
1639// function wouldn't actually be defined.)
1640static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1641 const DataLayout &DL) {
1642 // TODO: "LargestSize" is an approximation for "largest type that
1643 // you can express in C". It seems to be the case that int128 is
1644 // supported on all 64-bit platforms, otherwise only up to 64-bit
1645 // integers are supported. If we get this wrong, then we'll try to
1646 // call a sized libcall that doesn't actually exist. There should
1647 // really be some more reliable way in LLVM of determining integer
1648 // sizes which are valid in the target's C ABI...
1649 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1650 return Alignment >= Size &&
1651 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1652 Size <= LargestSize;
1653}
1654
1655void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1656 static const RTLIB::Libcall Libcalls[6] = {
1657 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1658 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1659 unsigned Size = getAtomicOpSize(I);
1660
1661 bool expanded = expandAtomicOpToLibcall(
1662 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1663 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1664 if (!expanded)
1665 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1666}
1667
1668void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1669 static const RTLIB::Libcall Libcalls[6] = {
1670 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1671 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1672 unsigned Size = getAtomicOpSize(I);
1673
1674 bool expanded = expandAtomicOpToLibcall(
1675 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1676 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1677 if (!expanded)
1678 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1679}
1680
1681void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1682 static const RTLIB::Libcall Libcalls[6] = {
1683 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1684 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1685 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1686 unsigned Size = getAtomicOpSize(I);
1687
1688 bool expanded = expandAtomicOpToLibcall(
1689 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1690 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1691 Libcalls);
1692 if (!expanded)
1693 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1694}
1695
1697 static const RTLIB::Libcall LibcallsXchg[6] = {
1698 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1699 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1700 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1701 static const RTLIB::Libcall LibcallsAdd[6] = {
1702 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1703 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1704 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1705 static const RTLIB::Libcall LibcallsSub[6] = {
1706 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1707 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1708 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1709 static const RTLIB::Libcall LibcallsAnd[6] = {
1710 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1711 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1712 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1713 static const RTLIB::Libcall LibcallsOr[6] = {
1714 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1715 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1716 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1717 static const RTLIB::Libcall LibcallsXor[6] = {
1718 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1719 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1720 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1721 static const RTLIB::Libcall LibcallsNand[6] = {
1722 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1723 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1724 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1725
1726 switch (Op) {
1728 llvm_unreachable("Should not have BAD_BINOP.");
1730 return ArrayRef(LibcallsXchg);
1731 case AtomicRMWInst::Add:
1732 return ArrayRef(LibcallsAdd);
1733 case AtomicRMWInst::Sub:
1734 return ArrayRef(LibcallsSub);
1735 case AtomicRMWInst::And:
1736 return ArrayRef(LibcallsAnd);
1737 case AtomicRMWInst::Or:
1738 return ArrayRef(LibcallsOr);
1739 case AtomicRMWInst::Xor:
1740 return ArrayRef(LibcallsXor);
1742 return ArrayRef(LibcallsNand);
1743 case AtomicRMWInst::Max:
1744 case AtomicRMWInst::Min:
1753 // No atomic libcalls are available for max/min/umax/umin.
1754 return {};
1755 }
1756 llvm_unreachable("Unexpected AtomicRMW operation.");
1757}
1758
1759void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1760 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1761
1762 unsigned Size = getAtomicOpSize(I);
1763
1764 bool Success = false;
1765 if (!Libcalls.empty())
1766 Success = expandAtomicOpToLibcall(
1767 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1768 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1769
1770 // The expansion failed: either there were no libcalls at all for
1771 // the operation (min/max), or there were only size-specialized
1772 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1773 // CAS libcall, via a CAS loop, instead.
1774 if (!Success) {
1776 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1777 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1778 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1779 // Create the CAS instruction normally...
1780 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1781 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1783 Success = Builder.CreateExtractValue(Pair, 1, "success");
1784 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1785
1786 // ...and then expand the CAS into a libcall.
1787 expandAtomicCASToLibcall(Pair);
1788 });
1789 }
1790}
1791
1792// A helper routine for the above expandAtomic*ToLibcall functions.
1793//
1794// 'Libcalls' contains an array of enum values for the particular
1795// ATOMIC libcalls to be emitted. All of the other arguments besides
1796// 'I' are extracted from the Instruction subclass by the
1797// caller. Depending on the particular call, some will be null.
1798bool AtomicExpandImpl::expandAtomicOpToLibcall(
1799 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1800 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1801 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1802 assert(Libcalls.size() == 6);
1803
1804 LLVMContext &Ctx = I->getContext();
1805 Module *M = I->getModule();
1806 const DataLayout &DL = M->getDataLayout();
1807 IRBuilder<> Builder(I);
1808 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1809
1810 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1811 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1812
1813 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1814
1815 // TODO: the "order" argument type is "int", not int32. So
1816 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1817 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1818 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1819 Constant *OrderingVal =
1820 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1821 Constant *Ordering2Val = nullptr;
1822 if (CASExpected) {
1823 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1824 Ordering2Val =
1825 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1826 }
1827 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1828
1829 RTLIB::Libcall RTLibType;
1830 if (UseSizedLibcall) {
1831 switch (Size) {
1832 case 1:
1833 RTLibType = Libcalls[1];
1834 break;
1835 case 2:
1836 RTLibType = Libcalls[2];
1837 break;
1838 case 4:
1839 RTLibType = Libcalls[3];
1840 break;
1841 case 8:
1842 RTLibType = Libcalls[4];
1843 break;
1844 case 16:
1845 RTLibType = Libcalls[5];
1846 break;
1847 }
1848 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1849 RTLibType = Libcalls[0];
1850 } else {
1851 // Can't use sized function, and there's no generic for this
1852 // operation, so give up.
1853 return false;
1854 }
1855
1856 if (!TLI->getLibcallName(RTLibType)) {
1857 // This target does not implement the requested atomic libcall so give up.
1858 return false;
1859 }
1860
1861 // Build up the function call. There's two kinds. First, the sized
1862 // variants. These calls are going to be one of the following (with
1863 // N=1,2,4,8,16):
1864 // iN __atomic_load_N(iN *ptr, int ordering)
1865 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1866 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1867 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1868 // int success_order, int failure_order)
1869 //
1870 // Note that these functions can be used for non-integer atomic
1871 // operations, the values just need to be bitcast to integers on the
1872 // way in and out.
1873 //
1874 // And, then, the generic variants. They look like the following:
1875 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1876 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1877 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1878 // int ordering)
1879 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1880 // void *desired, int success_order,
1881 // int failure_order)
1882 //
1883 // The different signatures are built up depending on the
1884 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1885 // variables.
1886
1887 AllocaInst *AllocaCASExpected = nullptr;
1888 AllocaInst *AllocaValue = nullptr;
1889 AllocaInst *AllocaResult = nullptr;
1890
1891 Type *ResultTy;
1893 AttributeList Attr;
1894
1895 // 'size' argument.
1896 if (!UseSizedLibcall) {
1897 // Note, getIntPtrType is assumed equivalent to size_t.
1898 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1899 }
1900
1901 // 'ptr' argument.
1902 // note: This assumes all address spaces share a common libfunc
1903 // implementation and that addresses are convertable. For systems without
1904 // that property, we'd need to extend this mechanism to support AS-specific
1905 // families of atomic intrinsics.
1906 Value *PtrVal = PointerOperand;
1907 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1908 Args.push_back(PtrVal);
1909
1910 // 'expected' argument, if present.
1911 if (CASExpected) {
1912 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1913 AllocaCASExpected->setAlignment(AllocaAlignment);
1914 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1915 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1916 Args.push_back(AllocaCASExpected);
1917 }
1918
1919 // 'val' argument ('desired' for cas), if present.
1920 if (ValueOperand) {
1921 if (UseSizedLibcall) {
1922 Value *IntValue =
1923 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1924 Args.push_back(IntValue);
1925 } else {
1926 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1927 AllocaValue->setAlignment(AllocaAlignment);
1928 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1929 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1930 Args.push_back(AllocaValue);
1931 }
1932 }
1933
1934 // 'ret' argument.
1935 if (!CASExpected && HasResult && !UseSizedLibcall) {
1936 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1937 AllocaResult->setAlignment(AllocaAlignment);
1938 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1939 Args.push_back(AllocaResult);
1940 }
1941
1942 // 'ordering' ('success_order' for cas) argument.
1943 Args.push_back(OrderingVal);
1944
1945 // 'failure_order' argument, if present.
1946 if (Ordering2Val)
1947 Args.push_back(Ordering2Val);
1948
1949 // Now, the return type.
1950 if (CASExpected) {
1951 ResultTy = Type::getInt1Ty(Ctx);
1952 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1953 } else if (HasResult && UseSizedLibcall)
1954 ResultTy = SizedIntTy;
1955 else
1956 ResultTy = Type::getVoidTy(Ctx);
1957
1958 // Done with setting up arguments and return types, create the call:
1960 for (Value *Arg : Args)
1961 ArgTys.push_back(Arg->getType());
1962 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1963 FunctionCallee LibcallFn =
1964 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1965 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1966 Call->setAttributes(Attr);
1967 Value *Result = Call;
1968
1969 // And then, extract the results...
1970 if (ValueOperand && !UseSizedLibcall)
1971 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
1972
1973 if (CASExpected) {
1974 // The final result from the CAS is {load of 'expected' alloca, bool result
1975 // from call}
1976 Type *FinalResultTy = I->getType();
1977 Value *V = PoisonValue::get(FinalResultTy);
1978 Value *ExpectedOut = Builder.CreateAlignedLoad(
1979 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1980 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
1981 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1982 V = Builder.CreateInsertValue(V, Result, 1);
1983 I->replaceAllUsesWith(V);
1984 } else if (HasResult) {
1985 Value *V;
1986 if (UseSizedLibcall)
1987 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1988 else {
1989 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1990 AllocaAlignment);
1991 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
1992 }
1993 I->replaceAllUsesWith(V);
1994 }
1995 I->eraseFromParent();
1996 return true;
1997}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:651
void setWeak(bool IsWeak)
Definition: Instructions.h:608
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:599
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:588
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:606
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:603
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:626
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:664
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:877
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ FSub
*p = old - v
Definition: Instructions.h:788
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:796
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:792
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
Value * getPointerOperand()
Definition: Instructions.h:910
BinOp getOperation() const
Definition: Instructions.h:845
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:901
Value * getValOperand()
Definition: Instructions.h:914
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:567
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1841
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2523
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:539
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1807
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:481
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2245
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2205
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:317
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:496
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2132
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:76
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:230
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:266
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:233
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:364
void setAlignment(Align Align)
Definition: Instructions.h:373
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:400
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:41
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397