LLVM 19.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/User.h"
43#include "llvm/IR/Value.h"
45#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <iterator>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "atomic-expand"
60
61namespace {
62
63class AtomicExpandImpl {
64 const TargetLowering *TLI = nullptr;
65 const DataLayout *DL = nullptr;
66
67private:
68 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
69 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
70 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
71 bool tryExpandAtomicLoad(LoadInst *LI);
72 bool expandAtomicLoadToLL(LoadInst *LI);
73 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
74 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
75 bool tryExpandAtomicStore(StoreInst *SI);
76 void expandAtomicStore(StoreInst *SI);
77 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
78 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
79 Value *
80 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
81 Align AddrAlign, AtomicOrdering MemOpOrder,
82 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
83 void expandAtomicOpToLLSC(
84 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
85 AtomicOrdering MemOpOrder,
86 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
87 void expandPartwordAtomicRMW(
89 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
90 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
91 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
92 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
93
94 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
95 static Value *insertRMWCmpXchgLoop(
96 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
97 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
98 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
99 CreateCmpXchgInstFun CreateCmpXchg);
100 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
101
102 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103 bool isIdempotentRMW(AtomicRMWInst *RMWI);
104 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
105
106 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
107 Value *PointerOperand, Value *ValueOperand,
108 Value *CASExpected, AtomicOrdering Ordering,
109 AtomicOrdering Ordering2,
110 ArrayRef<RTLIB::Libcall> Libcalls);
111 void expandAtomicLoadToLibcall(LoadInst *LI);
112 void expandAtomicStoreToLibcall(StoreInst *LI);
113 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
114 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
115
116 friend bool
118 CreateCmpXchgInstFun CreateCmpXchg);
119
120public:
121 bool run(Function &F, const TargetMachine *TM);
122};
123
124class AtomicExpandLegacy : public FunctionPass {
125public:
126 static char ID; // Pass identification, replacement for typeid
127
128 AtomicExpandLegacy() : FunctionPass(ID) {
130 }
131
132 bool runOnFunction(Function &F) override;
133};
134
135// IRBuilder to be used for replacement atomic instructions.
136struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
137 // Preserves the DebugLoc from I, and preserves still valid metadata.
138 // Enable StrictFP builder mode when appropriate.
139 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
140 : IRBuilder(I->getContext(), DL) {
142 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
143 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
144 this->setIsFPConstrained(true);
145 }
146};
147
148} // end anonymous namespace
149
150char AtomicExpandLegacy::ID = 0;
151
152char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
153
155 "Expand Atomic instructions", false, false)
157INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
158 "Expand Atomic instructions", false, false)
159
160// Helper functions to retrieve the size of atomic instructions.
161static unsigned getAtomicOpSize(LoadInst *LI) {
162 const DataLayout &DL = LI->getModule()->getDataLayout();
163 return DL.getTypeStoreSize(LI->getType());
164}
165
166static unsigned getAtomicOpSize(StoreInst *SI) {
167 const DataLayout &DL = SI->getModule()->getDataLayout();
168 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
169}
170
171static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
172 const DataLayout &DL = RMWI->getModule()->getDataLayout();
173 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
174}
175
176static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
177 const DataLayout &DL = CASI->getModule()->getDataLayout();
178 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
179}
180
181// Determine if a particular atomic operation has a supported size,
182// and is of appropriate alignment, to be passed through for target
183// lowering. (Versus turning into a __atomic libcall)
184template <typename Inst>
185static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
186 unsigned Size = getAtomicOpSize(I);
187 Align Alignment = I->getAlign();
188 return Alignment >= Size &&
190}
191
192bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
193 const auto *Subtarget = TM->getSubtargetImpl(F);
194 if (!Subtarget->enableAtomicExpand())
195 return false;
196 TLI = Subtarget->getTargetLowering();
197 DL = &F.getParent()->getDataLayout();
198
200
201 // Changing control-flow while iterating through it is a bad idea, so gather a
202 // list of all atomic instructions before we start.
203 for (Instruction &I : instructions(F))
204 if (I.isAtomic() && !isa<FenceInst>(&I))
205 AtomicInsts.push_back(&I);
206
207 bool MadeChange = false;
208 for (auto *I : AtomicInsts) {
209 auto LI = dyn_cast<LoadInst>(I);
210 auto SI = dyn_cast<StoreInst>(I);
211 auto RMWI = dyn_cast<AtomicRMWInst>(I);
212 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
213 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
214
215 // If the Size/Alignment is not supported, replace with a libcall.
216 if (LI) {
217 if (!atomicSizeSupported(TLI, LI)) {
218 expandAtomicLoadToLibcall(LI);
219 MadeChange = true;
220 continue;
221 }
222 } else if (SI) {
223 if (!atomicSizeSupported(TLI, SI)) {
224 expandAtomicStoreToLibcall(SI);
225 MadeChange = true;
226 continue;
227 }
228 } else if (RMWI) {
229 if (!atomicSizeSupported(TLI, RMWI)) {
230 expandAtomicRMWToLibcall(RMWI);
231 MadeChange = true;
232 continue;
233 }
234 } else if (CASI) {
235 if (!atomicSizeSupported(TLI, CASI)) {
236 expandAtomicCASToLibcall(CASI);
237 MadeChange = true;
238 continue;
239 }
240 }
241
242 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I = LI = convertAtomicLoadToIntegerType(LI);
245 MadeChange = true;
246 } else if (SI &&
247 TLI->shouldCastAtomicStoreInIR(SI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I = SI = convertAtomicStoreToIntegerType(SI);
250 MadeChange = true;
251 } else if (RMWI &&
252 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
253 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
254 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
255 MadeChange = true;
256 } else if (CASI) {
257 // TODO: when we're ready to make the change at the IR level, we can
258 // extend convertCmpXchgToInteger for floating point too.
259 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
260 // TODO: add a TLI hook to control this so that each target can
261 // convert to lowering the original type one at a time.
262 I = CASI = convertCmpXchgToIntegerType(CASI);
263 MadeChange = true;
264 }
265 }
266
267 if (TLI->shouldInsertFencesForAtomic(I)) {
268 auto FenceOrdering = AtomicOrdering::Monotonic;
269 if (LI && isAcquireOrStronger(LI->getOrdering())) {
270 FenceOrdering = LI->getOrdering();
271 LI->setOrdering(AtomicOrdering::Monotonic);
272 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
273 FenceOrdering = SI->getOrdering();
274 SI->setOrdering(AtomicOrdering::Monotonic);
275 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
276 isAcquireOrStronger(RMWI->getOrdering()))) {
277 FenceOrdering = RMWI->getOrdering();
278 RMWI->setOrdering(AtomicOrdering::Monotonic);
279 } else if (CASI &&
280 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
281 TargetLoweringBase::AtomicExpansionKind::None &&
282 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
283 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
284 isAcquireOrStronger(CASI->getFailureOrdering()))) {
285 // If a compare and swap is lowered to LL/SC, we can do smarter fence
286 // insertion, with a stronger one on the success path than on the
287 // failure path. As a result, fence insertion is directly done by
288 // expandAtomicCmpXchg in that case.
289 FenceOrdering = CASI->getMergedOrdering();
290 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
291 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
292 }
293
294 if (FenceOrdering != AtomicOrdering::Monotonic) {
295 MadeChange |= bracketInstWithFences(I, FenceOrdering);
296 }
297 } else if (I->hasAtomicStore() &&
298 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
299 auto FenceOrdering = AtomicOrdering::Monotonic;
300 if (SI)
301 FenceOrdering = SI->getOrdering();
302 else if (RMWI)
303 FenceOrdering = RMWI->getOrdering();
304 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
305 TargetLoweringBase::AtomicExpansionKind::LLSC)
306 // LLSC is handled in expandAtomicCmpXchg().
307 FenceOrdering = CASI->getSuccessOrdering();
308
309 IRBuilder Builder(I);
310 if (auto TrailingFence =
311 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
312 TrailingFence->moveAfter(I);
313 MadeChange = true;
314 }
315 }
316
317 if (LI)
318 MadeChange |= tryExpandAtomicLoad(LI);
319 else if (SI)
320 MadeChange |= tryExpandAtomicStore(SI);
321 else if (RMWI) {
322 // There are two different ways of expanding RMW instructions:
323 // - into a load if it is idempotent
324 // - into a Cmpxchg/LL-SC loop otherwise
325 // we try them in that order.
326
327 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
328 MadeChange = true;
329 } else {
330 MadeChange |= tryExpandAtomicRMW(RMWI);
331 }
332 } else if (CASI)
333 MadeChange |= tryExpandAtomicCmpXchg(CASI);
334 }
335 return MadeChange;
336}
337
338bool AtomicExpandLegacy::runOnFunction(Function &F) {
339
340 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
341 if (!TPC)
342 return false;
343 auto *TM = &TPC->getTM<TargetMachine>();
344 AtomicExpandImpl AE;
345 return AE.run(F, TM);
346}
347
349 return new AtomicExpandLegacy();
350}
351
354 AtomicExpandImpl AE;
355
356 bool Changed = AE.run(F, TM);
357 if (!Changed)
358 return PreservedAnalyses::all();
359
361}
362
363bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
364 AtomicOrdering Order) {
365 ReplacementIRBuilder Builder(I, *DL);
366
367 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
368
369 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
370 // We have a guard here because not every atomic operation generates a
371 // trailing fence.
372 if (TrailingFence)
373 TrailingFence->moveAfter(I);
374
375 return (LeadingFence || TrailingFence);
376}
377
378/// Get the iX type with the same bitwidth as T.
380AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
381 EVT VT = TLI->getMemValueType(DL, T);
382 unsigned BitWidth = VT.getStoreSizeInBits();
383 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
384 return IntegerType::get(T->getContext(), BitWidth);
385}
386
387/// Convert an atomic load of a non-integral type to an integer load of the
388/// equivalent bitwidth. See the function comment on
389/// convertAtomicStoreToIntegerType for background.
390LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
391 auto *M = LI->getModule();
392 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
393
394 ReplacementIRBuilder Builder(LI, *DL);
395
397
398 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
399 NewLI->setAlignment(LI->getAlign());
400 NewLI->setVolatile(LI->isVolatile());
401 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
402 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
403
404 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
405 LI->replaceAllUsesWith(NewVal);
406 LI->eraseFromParent();
407 return NewLI;
408}
409
411AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
412 auto *M = RMWI->getModule();
413 Type *NewTy =
414 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
415
416 ReplacementIRBuilder Builder(RMWI, *DL);
417
418 Value *Addr = RMWI->getPointerOperand();
419 Value *Val = RMWI->getValOperand();
420 Value *NewVal = Val->getType()->isPointerTy()
421 ? Builder.CreatePtrToInt(Val, NewTy)
422 : Builder.CreateBitCast(Val, NewTy);
423
424 auto *NewRMWI = Builder.CreateAtomicRMW(
425 AtomicRMWInst::Xchg, Addr, NewVal, RMWI->getAlign(), RMWI->getOrdering());
426 NewRMWI->setVolatile(RMWI->isVolatile());
427 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
428
429 Value *NewRVal = RMWI->getType()->isPointerTy()
430 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
431 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
432 RMWI->replaceAllUsesWith(NewRVal);
433 RMWI->eraseFromParent();
434 return NewRMWI;
435}
436
437bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
438 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
440 return false;
442 expandAtomicOpToLLSC(
443 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
444 LI->getOrdering(),
445 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
446 return true;
448 return expandAtomicLoadToLL(LI);
450 return expandAtomicLoadToCmpXchg(LI);
453 return true;
454 default:
455 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
456 }
457}
458
459bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
460 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
462 return false;
464 expandAtomicStore(SI);
465 return true;
467 SI->setAtomic(AtomicOrdering::NotAtomic);
468 return true;
469 default:
470 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
471 }
472}
473
474bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
475 ReplacementIRBuilder Builder(LI, *DL);
476
477 // On some architectures, load-linked instructions are atomic for larger
478 // sizes than normal loads. For example, the only 64-bit load guaranteed
479 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
480 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
481 LI->getPointerOperand(), LI->getOrdering());
482 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
483
484 LI->replaceAllUsesWith(Val);
485 LI->eraseFromParent();
486
487 return true;
488}
489
490bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
491 ReplacementIRBuilder Builder(LI, *DL);
492 AtomicOrdering Order = LI->getOrdering();
493 if (Order == AtomicOrdering::Unordered)
495
497 Type *Ty = LI->getType();
498 Constant *DummyVal = Constant::getNullValue(Ty);
499
500 Value *Pair = Builder.CreateAtomicCmpXchg(
501 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
503 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
504
505 LI->replaceAllUsesWith(Loaded);
506 LI->eraseFromParent();
507
508 return true;
509}
510
511/// Convert an atomic store of a non-integral type to an integer store of the
512/// equivalent bitwidth. We used to not support floating point or vector
513/// atomics in the IR at all. The backends learned to deal with the bitcast
514/// idiom because that was the only way of expressing the notion of a atomic
515/// float or vector store. The long term plan is to teach each backend to
516/// instruction select from the original atomic store, but as a migration
517/// mechanism, we convert back to the old format which the backends understand.
518/// Each backend will need individual work to recognize the new format.
519StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
520 ReplacementIRBuilder Builder(SI, *DL);
521 auto *M = SI->getModule();
522 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
523 M->getDataLayout());
524 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
525
526 Value *Addr = SI->getPointerOperand();
527
528 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
529 NewSI->setAlignment(SI->getAlign());
530 NewSI->setVolatile(SI->isVolatile());
531 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
532 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
533 SI->eraseFromParent();
534 return NewSI;
535}
536
537void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
538 // This function is only called on atomic stores that are too large to be
539 // atomic if implemented as a native store. So we replace them by an
540 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
541 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
542 // It is the responsibility of the target to only signal expansion via
543 // shouldExpandAtomicRMW in cases where this is required and possible.
544 ReplacementIRBuilder Builder(SI, *DL);
545 AtomicOrdering Ordering = SI->getOrdering();
547 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
549 : Ordering;
550 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
551 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
552 SI->getAlign(), RMWOrdering);
553 SI->eraseFromParent();
554
555 // Now we have an appropriate swap instruction, lower it as usual.
556 tryExpandAtomicRMW(AI);
557}
558
560 Value *Loaded, Value *NewVal, Align AddrAlign,
561 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
562 Value *&Success, Value *&NewLoaded) {
563 Type *OrigTy = NewVal->getType();
564
565 // This code can go away when cmpxchg supports FP and vector types.
566 assert(!OrigTy->isPointerTy());
567 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
568 if (NeedBitcast) {
569 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
570 NewVal = Builder.CreateBitCast(NewVal, IntTy);
571 Loaded = Builder.CreateBitCast(Loaded, IntTy);
572 }
573
574 Value *Pair = Builder.CreateAtomicCmpXchg(
575 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
577 Success = Builder.CreateExtractValue(Pair, 1, "success");
578 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
579
580 if (NeedBitcast)
581 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
582}
583
584bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
585 LLVMContext &Ctx = AI->getModule()->getContext();
586 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
587 switch (Kind) {
589 return false;
591 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
592 unsigned ValueSize = getAtomicOpSize(AI);
593 if (ValueSize < MinCASSize) {
594 expandPartwordAtomicRMW(AI,
596 } else {
597 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
598 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
599 AI->getValOperand());
600 };
601 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
602 AI->getAlign(), AI->getOrdering(), PerformOp);
603 }
604 return true;
605 }
607 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
608 unsigned ValueSize = getAtomicOpSize(AI);
609 if (ValueSize < MinCASSize) {
610 expandPartwordAtomicRMW(AI,
612 } else {
614 Ctx.getSyncScopeNames(SSNs);
615 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
616 ? "system"
617 : SSNs[AI->getSyncScopeID()];
619 ORE.emit([&]() {
620 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
621 << "A compare and swap loop was generated for an atomic "
622 << AI->getOperationName(AI->getOperation()) << " operation at "
623 << MemScope << " memory scope";
624 });
626 }
627 return true;
628 }
630 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
631 unsigned ValueSize = getAtomicOpSize(AI);
632 if (ValueSize < MinCASSize) {
634 // Widen And/Or/Xor and give the target another chance at expanding it.
637 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
638 return true;
639 }
640 }
641 expandAtomicRMWToMaskedIntrinsic(AI);
642 return true;
643 }
645 TLI->emitBitTestAtomicRMWIntrinsic(AI);
646 return true;
647 }
649 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
650 return true;
651 }
653 return lowerAtomicRMWInst(AI);
655 TLI->emitExpandAtomicRMW(AI);
656 return true;
657 default:
658 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
659 }
660}
661
662namespace {
663
664struct PartwordMaskValues {
665 // These three fields are guaranteed to be set by createMaskInstrs.
666 Type *WordType = nullptr;
667 Type *ValueType = nullptr;
668 Type *IntValueType = nullptr;
669 Value *AlignedAddr = nullptr;
670 Align AlignedAddrAlignment;
671 // The remaining fields can be null.
672 Value *ShiftAmt = nullptr;
673 Value *Mask = nullptr;
674 Value *Inv_Mask = nullptr;
675};
676
678raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
679 auto PrintObj = [&O](auto *V) {
680 if (V)
681 O << *V;
682 else
683 O << "nullptr";
684 O << '\n';
685 };
686 O << "PartwordMaskValues {\n";
687 O << " WordType: ";
688 PrintObj(PMV.WordType);
689 O << " ValueType: ";
690 PrintObj(PMV.ValueType);
691 O << " AlignedAddr: ";
692 PrintObj(PMV.AlignedAddr);
693 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
694 O << " ShiftAmt: ";
695 PrintObj(PMV.ShiftAmt);
696 O << " Mask: ";
697 PrintObj(PMV.Mask);
698 O << " Inv_Mask: ";
699 PrintObj(PMV.Inv_Mask);
700 O << "}\n";
701 return O;
702}
703
704} // end anonymous namespace
705
706/// This is a helper function which builds instructions to provide
707/// values necessary for partword atomic operations. It takes an
708/// incoming address, Addr, and ValueType, and constructs the address,
709/// shift-amounts and masks needed to work with a larger value of size
710/// WordSize.
711///
712/// AlignedAddr: Addr rounded down to a multiple of WordSize
713///
714/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
715/// from AlignAddr for it to have the same value as if
716/// ValueType was loaded from Addr.
717///
718/// Mask: Value to mask with the value loaded from AlignAddr to
719/// include only the part that would've been loaded from Addr.
720///
721/// Inv_Mask: The inverse of Mask.
722static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
724 Value *Addr, Align AddrAlign,
725 unsigned MinWordSize) {
726 PartwordMaskValues PMV;
727
728 Module *M = I->getModule();
729 LLVMContext &Ctx = M->getContext();
730 const DataLayout &DL = M->getDataLayout();
731 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
732
733 PMV.ValueType = PMV.IntValueType = ValueType;
734 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
735 PMV.IntValueType =
736 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
737
738 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
739 : ValueType;
740 if (PMV.ValueType == PMV.WordType) {
741 PMV.AlignedAddr = Addr;
742 PMV.AlignedAddrAlignment = AddrAlign;
743 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
744 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
745 return PMV;
746 }
747
748 PMV.AlignedAddrAlignment = Align(MinWordSize);
749
750 assert(ValueSize < MinWordSize);
751
752 PointerType *PtrTy = cast<PointerType>(Addr->getType());
753 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
754 Value *PtrLSB;
755
756 if (AddrAlign < MinWordSize) {
757 PMV.AlignedAddr = Builder.CreateIntrinsic(
758 Intrinsic::ptrmask, {PtrTy, IntTy},
759 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
760 "AlignedAddr");
761
762 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
763 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
764 } else {
765 // If the alignment is high enough, the LSB are known 0.
766 PMV.AlignedAddr = Addr;
767 PtrLSB = ConstantInt::getNullValue(IntTy);
768 }
769
770 if (DL.isLittleEndian()) {
771 // turn bytes into bits
772 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
773 } else {
774 // turn bytes into bits, and count from the other side.
775 PMV.ShiftAmt = Builder.CreateShl(
776 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
777 }
778
779 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
780 PMV.Mask = Builder.CreateShl(
781 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
782 "Mask");
783
784 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
785
786 return PMV;
787}
788
789static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
790 const PartwordMaskValues &PMV) {
791 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
792 if (PMV.WordType == PMV.ValueType)
793 return WideWord;
794
795 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
796 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
797 return Builder.CreateBitCast(Trunc, PMV.ValueType);
798}
799
800static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
801 Value *Updated, const PartwordMaskValues &PMV) {
802 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
803 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
804 if (PMV.WordType == PMV.ValueType)
805 return Updated;
806
807 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
808
809 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
810 Value *Shift =
811 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
812 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
813 Value *Or = Builder.CreateOr(And, Shift, "inserted");
814 return Or;
815}
816
817/// Emit IR to implement a masked version of a given atomicrmw
818/// operation. (That is, only the bits under the Mask should be
819/// affected by the operation)
821 IRBuilderBase &Builder, Value *Loaded,
822 Value *Shifted_Inc, Value *Inc,
823 const PartwordMaskValues &PMV) {
824 // TODO: update to use
825 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
826 // to merge bits from two values without requiring PMV.Inv_Mask.
827 switch (Op) {
828 case AtomicRMWInst::Xchg: {
829 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
830 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
831 return FinalVal;
832 }
836 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
839 case AtomicRMWInst::Nand: {
840 // The other arithmetic ops need to be masked into place.
841 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
842 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
843 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
844 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
845 return FinalVal;
846 }
857 // Finally, other ops will operate on the full value, so truncate down to
858 // the original size, and expand out again after doing the
859 // operation. Bitcasts will be inserted for FP values.
860 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
861 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
862 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
863 return FinalVal;
864 }
865 default:
866 llvm_unreachable("Unknown atomic op");
867 }
868}
869
870/// Expand a sub-word atomicrmw operation into an appropriate
871/// word-sized operation.
872///
873/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
874/// way as a typical atomicrmw expansion. The only difference here is
875/// that the operation inside of the loop may operate upon only a
876/// part of the value.
877void AtomicExpandImpl::expandPartwordAtomicRMW(
879 // Widen And/Or/Xor and give the target another chance at expanding it.
883 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
884 return;
885 }
886 AtomicOrdering MemOpOrder = AI->getOrdering();
887 SyncScope::ID SSID = AI->getSyncScopeID();
888
889 ReplacementIRBuilder Builder(AI, *DL);
890
891 PartwordMaskValues PMV =
892 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
893 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
894
895 Value *ValOperand_Shifted = nullptr;
898 ValOperand_Shifted =
899 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
900 PMV.ShiftAmt, "ValOperand_Shifted");
901 }
902
903 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
904 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
905 AI->getValOperand(), PMV);
906 };
907
908 Value *OldResult;
910 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
911 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
912 PerformPartwordOp, createCmpXchgInstFun);
913 } else {
915 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
916 PMV.AlignedAddrAlignment, MemOpOrder,
917 PerformPartwordOp);
918 }
919
920 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
921 AI->replaceAllUsesWith(FinalOldResult);
922 AI->eraseFromParent();
923}
924
925// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
926AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
927 ReplacementIRBuilder Builder(AI, *DL);
929
932 "Unable to widen operation");
933
934 PartwordMaskValues PMV =
935 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
936 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
937
938 Value *ValOperand_Shifted =
939 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
940 PMV.ShiftAmt, "ValOperand_Shifted");
941
942 Value *NewOperand;
943
944 if (Op == AtomicRMWInst::And)
945 NewOperand =
946 Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
947 else
948 NewOperand = ValOperand_Shifted;
949
950 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
951 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
952 AI->getOrdering(), AI->getSyncScopeID());
953 // TODO: Preserve metadata
954
955 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
956 AI->replaceAllUsesWith(FinalOldResult);
957 AI->eraseFromParent();
958 return NewAI;
959}
960
961bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
962 // The basic idea here is that we're expanding a cmpxchg of a
963 // smaller memory size up to a word-sized cmpxchg. To do this, we
964 // need to add a retry-loop for strong cmpxchg, so that
965 // modifications to other parts of the word don't cause a spurious
966 // failure.
967
968 // This generates code like the following:
969 // [[Setup mask values PMV.*]]
970 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
971 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
972 // %InitLoaded = load i32* %addr
973 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
974 // br partword.cmpxchg.loop
975 // partword.cmpxchg.loop:
976 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
977 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
978 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
979 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
980 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
981 // i32 %FullWord_NewVal success_ordering failure_ordering
982 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
983 // %Success = extractvalue { i32, i1 } %NewCI, 1
984 // br i1 %Success, label %partword.cmpxchg.end,
985 // label %partword.cmpxchg.failure
986 // partword.cmpxchg.failure:
987 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
988 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
989 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
990 // label %partword.cmpxchg.end
991 // partword.cmpxchg.end:
992 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
993 // %FinalOldVal = trunc i32 %tmp1 to i8
994 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
995 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
996
998 Value *Cmp = CI->getCompareOperand();
999 Value *NewVal = CI->getNewValOperand();
1000
1001 BasicBlock *BB = CI->getParent();
1002 Function *F = BB->getParent();
1003 ReplacementIRBuilder Builder(CI, *DL);
1004 LLVMContext &Ctx = Builder.getContext();
1005
1006 BasicBlock *EndBB =
1007 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1008 auto FailureBB =
1009 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1010 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1011
1012 // The split call above "helpfully" added a branch at the end of BB
1013 // (to the wrong place).
1014 std::prev(BB->end())->eraseFromParent();
1015 Builder.SetInsertPoint(BB);
1016
1017 PartwordMaskValues PMV =
1018 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1019 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1020
1021 // Shift the incoming values over, into the right location in the word.
1022 Value *NewVal_Shifted =
1023 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1024 Value *Cmp_Shifted =
1025 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1026
1027 // Load the entire current word, and mask into place the expected and new
1028 // values
1029 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1030 InitLoaded->setVolatile(CI->isVolatile());
1031 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1032 Builder.CreateBr(LoopBB);
1033
1034 // partword.cmpxchg.loop:
1035 Builder.SetInsertPoint(LoopBB);
1036 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1037 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1038
1039 // Mask/Or the expected and new values into place in the loaded word.
1040 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1041 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1042 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1043 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1045 NewCI->setVolatile(CI->isVolatile());
1046 // When we're building a strong cmpxchg, we need a loop, so you
1047 // might think we could use a weak cmpxchg inside. But, using strong
1048 // allows the below comparison for ShouldContinue, and we're
1049 // expecting the underlying cmpxchg to be a machine instruction,
1050 // which is strong anyways.
1051 NewCI->setWeak(CI->isWeak());
1052
1053 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1054 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1055
1056 if (CI->isWeak())
1057 Builder.CreateBr(EndBB);
1058 else
1059 Builder.CreateCondBr(Success, EndBB, FailureBB);
1060
1061 // partword.cmpxchg.failure:
1062 Builder.SetInsertPoint(FailureBB);
1063 // Upon failure, verify that the masked-out part of the loaded value
1064 // has been modified. If it didn't, abort the cmpxchg, since the
1065 // masked-in part must've.
1066 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1067 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1068 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1069
1070 // Add the second value to the phi from above
1071 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1072
1073 // partword.cmpxchg.end:
1074 Builder.SetInsertPoint(CI);
1075
1076 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1077 Value *Res = PoisonValue::get(CI->getType());
1078 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1079 Res = Builder.CreateInsertValue(Res, Success, 1);
1080
1081 CI->replaceAllUsesWith(Res);
1082 CI->eraseFromParent();
1083 return true;
1084}
1085
1086void AtomicExpandImpl::expandAtomicOpToLLSC(
1087 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1088 AtomicOrdering MemOpOrder,
1089 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1090 ReplacementIRBuilder Builder(I, *DL);
1091 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1092 MemOpOrder, PerformOp);
1093
1094 I->replaceAllUsesWith(Loaded);
1095 I->eraseFromParent();
1096}
1097
1098void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1099 ReplacementIRBuilder Builder(AI, *DL);
1100
1101 PartwordMaskValues PMV =
1102 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1103 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1104
1105 // The value operand must be sign-extended for signed min/max so that the
1106 // target's signed comparison instructions can be used. Otherwise, just
1107 // zero-ext.
1108 Instruction::CastOps CastOp = Instruction::ZExt;
1109 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1110 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1111 CastOp = Instruction::SExt;
1112
1113 Value *ValOperand_Shifted = Builder.CreateShl(
1114 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1115 PMV.ShiftAmt, "ValOperand_Shifted");
1116 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1117 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1118 AI->getOrdering());
1119 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1120 AI->replaceAllUsesWith(FinalOldResult);
1121 AI->eraseFromParent();
1122}
1123
1124void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1125 AtomicCmpXchgInst *CI) {
1126 ReplacementIRBuilder Builder(CI, *DL);
1127
1128 PartwordMaskValues PMV = createMaskInstrs(
1129 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1130 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1131
1132 Value *CmpVal_Shifted = Builder.CreateShl(
1133 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1134 "CmpVal_Shifted");
1135 Value *NewVal_Shifted = Builder.CreateShl(
1136 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1137 "NewVal_Shifted");
1138 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1139 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1140 CI->getMergedOrdering());
1141 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1142 Value *Res = PoisonValue::get(CI->getType());
1143 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1144 Value *Success = Builder.CreateICmpEQ(
1145 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1146 Res = Builder.CreateInsertValue(Res, Success, 1);
1147
1148 CI->replaceAllUsesWith(Res);
1149 CI->eraseFromParent();
1150}
1151
1152Value *AtomicExpandImpl::insertRMWLLSCLoop(
1153 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1154 AtomicOrdering MemOpOrder,
1155 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1156 LLVMContext &Ctx = Builder.getContext();
1157 BasicBlock *BB = Builder.GetInsertBlock();
1158 Function *F = BB->getParent();
1159
1160 assert(AddrAlign >=
1161 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1162 "Expected at least natural alignment at this point.");
1163
1164 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1165 //
1166 // The standard expansion we produce is:
1167 // [...]
1168 // atomicrmw.start:
1169 // %loaded = @load.linked(%addr)
1170 // %new = some_op iN %loaded, %incr
1171 // %stored = @store_conditional(%new, %addr)
1172 // %try_again = icmp i32 ne %stored, 0
1173 // br i1 %try_again, label %loop, label %atomicrmw.end
1174 // atomicrmw.end:
1175 // [...]
1176 BasicBlock *ExitBB =
1177 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1178 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1179
1180 // The split call above "helpfully" added a branch at the end of BB (to the
1181 // wrong place).
1182 std::prev(BB->end())->eraseFromParent();
1183 Builder.SetInsertPoint(BB);
1184 Builder.CreateBr(LoopBB);
1185
1186 // Start the main loop block now that we've taken care of the preliminaries.
1187 Builder.SetInsertPoint(LoopBB);
1188 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1189
1190 Value *NewVal = PerformOp(Builder, Loaded);
1191
1192 Value *StoreSuccess =
1193 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1194 Value *TryAgain = Builder.CreateICmpNE(
1195 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1196 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1197
1198 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1199 return Loaded;
1200}
1201
1202/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1203/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1204/// IR. As a migration step, we convert back to what use to be the standard
1205/// way to represent a pointer cmpxchg so that we can update backends one by
1206/// one.
1208AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1209 auto *M = CI->getModule();
1210 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1211 M->getDataLayout());
1212
1213 ReplacementIRBuilder Builder(CI, *DL);
1214
1215 Value *Addr = CI->getPointerOperand();
1216
1217 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1218 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1219
1220 auto *NewCI = Builder.CreateAtomicCmpXchg(
1221 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1222 CI->getFailureOrdering(), CI->getSyncScopeID());
1223 NewCI->setVolatile(CI->isVolatile());
1224 NewCI->setWeak(CI->isWeak());
1225 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1226
1227 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1228 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1229
1230 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1231
1232 Value *Res = PoisonValue::get(CI->getType());
1233 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1234 Res = Builder.CreateInsertValue(Res, Succ, 1);
1235
1236 CI->replaceAllUsesWith(Res);
1237 CI->eraseFromParent();
1238 return NewCI;
1239}
1240
1241bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1242 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1243 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1244 Value *Addr = CI->getPointerOperand();
1245 BasicBlock *BB = CI->getParent();
1246 Function *F = BB->getParent();
1247 LLVMContext &Ctx = F->getContext();
1248 // If shouldInsertFencesForAtomic() returns true, then the target does not
1249 // want to deal with memory orders, and emitLeading/TrailingFence should take
1250 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1251 // should preserve the ordering.
1252 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1253 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1255 : CI->getMergedOrdering();
1256
1257 // In implementations which use a barrier to achieve release semantics, we can
1258 // delay emitting this barrier until we know a store is actually going to be
1259 // attempted. The cost of this delay is that we need 2 copies of the block
1260 // emitting the load-linked, affecting code size.
1261 //
1262 // Ideally, this logic would be unconditional except for the minsize check
1263 // since in other cases the extra blocks naturally collapse down to the
1264 // minimal loop. Unfortunately, this puts too much stress on later
1265 // optimisations so we avoid emitting the extra logic in those cases too.
1266 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1267 SuccessOrder != AtomicOrdering::Monotonic &&
1268 SuccessOrder != AtomicOrdering::Acquire &&
1269 !F->hasMinSize();
1270
1271 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1272 // do it even on minsize.
1273 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1274
1275 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1276 //
1277 // The full expansion we produce is:
1278 // [...]
1279 // %aligned.addr = ...
1280 // cmpxchg.start:
1281 // %unreleasedload = @load.linked(%aligned.addr)
1282 // %unreleasedload.extract = extract value from %unreleasedload
1283 // %should_store = icmp eq %unreleasedload.extract, %desired
1284 // br i1 %should_store, label %cmpxchg.releasingstore,
1285 // label %cmpxchg.nostore
1286 // cmpxchg.releasingstore:
1287 // fence?
1288 // br label cmpxchg.trystore
1289 // cmpxchg.trystore:
1290 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1291 // [%releasedload, %cmpxchg.releasedload]
1292 // %updated.new = insert %new into %loaded.trystore
1293 // %stored = @store_conditional(%updated.new, %aligned.addr)
1294 // %success = icmp eq i32 %stored, 0
1295 // br i1 %success, label %cmpxchg.success,
1296 // label %cmpxchg.releasedload/%cmpxchg.failure
1297 // cmpxchg.releasedload:
1298 // %releasedload = @load.linked(%aligned.addr)
1299 // %releasedload.extract = extract value from %releasedload
1300 // %should_store = icmp eq %releasedload.extract, %desired
1301 // br i1 %should_store, label %cmpxchg.trystore,
1302 // label %cmpxchg.failure
1303 // cmpxchg.success:
1304 // fence?
1305 // br label %cmpxchg.end
1306 // cmpxchg.nostore:
1307 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1308 // [%releasedload,
1309 // %cmpxchg.releasedload/%cmpxchg.trystore]
1310 // @load_linked_fail_balance()?
1311 // br label %cmpxchg.failure
1312 // cmpxchg.failure:
1313 // fence?
1314 // br label %cmpxchg.end
1315 // cmpxchg.end:
1316 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1317 // [%loaded.trystore, %cmpxchg.trystore]
1318 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1319 // %loaded = extract value from %loaded.exit
1320 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1321 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1322 // [...]
1323 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1324 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1325 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1326 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1327 auto ReleasedLoadBB =
1328 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1329 auto TryStoreBB =
1330 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1331 auto ReleasingStoreBB =
1332 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1333 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1334
1335 ReplacementIRBuilder Builder(CI, *DL);
1336
1337 // The split call above "helpfully" added a branch at the end of BB (to the
1338 // wrong place), but we might want a fence too. It's easiest to just remove
1339 // the branch entirely.
1340 std::prev(BB->end())->eraseFromParent();
1341 Builder.SetInsertPoint(BB);
1342 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1343 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1344
1345 PartwordMaskValues PMV =
1346 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1347 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1348 Builder.CreateBr(StartBB);
1349
1350 // Start the main loop block now that we've taken care of the preliminaries.
1351 Builder.SetInsertPoint(StartBB);
1352 Value *UnreleasedLoad =
1353 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1354 Value *UnreleasedLoadExtract =
1355 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1356 Value *ShouldStore = Builder.CreateICmpEQ(
1357 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1358
1359 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1360 // jump straight past that fence instruction (if it exists).
1361 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1362
1363 Builder.SetInsertPoint(ReleasingStoreBB);
1364 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1365 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1366 Builder.CreateBr(TryStoreBB);
1367
1368 Builder.SetInsertPoint(TryStoreBB);
1369 PHINode *LoadedTryStore =
1370 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1371 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1372 Value *NewValueInsert =
1373 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1374 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1375 PMV.AlignedAddr, MemOpOrder);
1376 StoreSuccess = Builder.CreateICmpEQ(
1377 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1378 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1379 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1380 CI->isWeak() ? FailureBB : RetryBB);
1381
1382 Builder.SetInsertPoint(ReleasedLoadBB);
1383 Value *SecondLoad;
1384 if (HasReleasedLoadBB) {
1385 SecondLoad =
1386 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1387 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1388 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1389 CI->getCompareOperand(), "should_store");
1390
1391 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1392 // jump straight past that fence instruction (if it exists).
1393 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1394 // Update PHI node in TryStoreBB.
1395 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1396 } else
1397 Builder.CreateUnreachable();
1398
1399 // Make sure later instructions don't get reordered with a fence if
1400 // necessary.
1401 Builder.SetInsertPoint(SuccessBB);
1402 if (ShouldInsertFencesForAtomic ||
1403 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1404 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1405 Builder.CreateBr(ExitBB);
1406
1407 Builder.SetInsertPoint(NoStoreBB);
1408 PHINode *LoadedNoStore =
1409 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1410 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1411 if (HasReleasedLoadBB)
1412 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1413
1414 // In the failing case, where we don't execute the store-conditional, the
1415 // target might want to balance out the load-linked with a dedicated
1416 // instruction (e.g., on ARM, clearing the exclusive monitor).
1417 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1418 Builder.CreateBr(FailureBB);
1419
1420 Builder.SetInsertPoint(FailureBB);
1421 PHINode *LoadedFailure =
1422 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1423 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1424 if (CI->isWeak())
1425 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1426 if (ShouldInsertFencesForAtomic)
1427 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1428 Builder.CreateBr(ExitBB);
1429
1430 // Finally, we have control-flow based knowledge of whether the cmpxchg
1431 // succeeded or not. We expose this to later passes by converting any
1432 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1433 // PHI.
1434 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1435 PHINode *LoadedExit =
1436 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1437 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1438 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1439 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1440 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1441 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1442
1443 // This is the "exit value" from the cmpxchg expansion. It may be of
1444 // a type wider than the one in the cmpxchg instruction.
1445 Value *LoadedFull = LoadedExit;
1446
1447 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1448 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1449
1450 // Look for any users of the cmpxchg that are just comparing the loaded value
1451 // against the desired one, and replace them with the CFG-derived version.
1453 for (auto *User : CI->users()) {
1454 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1455 if (!EV)
1456 continue;
1457
1458 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1459 "weird extraction from { iN, i1 }");
1460
1461 if (EV->getIndices()[0] == 0)
1462 EV->replaceAllUsesWith(Loaded);
1463 else
1465
1466 PrunedInsts.push_back(EV);
1467 }
1468
1469 // We can remove the instructions now we're no longer iterating through them.
1470 for (auto *EV : PrunedInsts)
1471 EV->eraseFromParent();
1472
1473 if (!CI->use_empty()) {
1474 // Some use of the full struct return that we don't understand has happened,
1475 // so we've got to reconstruct it properly.
1476 Value *Res;
1477 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1478 Res = Builder.CreateInsertValue(Res, Success, 1);
1479
1480 CI->replaceAllUsesWith(Res);
1481 }
1482
1483 CI->eraseFromParent();
1484 return true;
1485}
1486
1487bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1488 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1489 if (!C)
1490 return false;
1491
1493 switch (Op) {
1494 case AtomicRMWInst::Add:
1495 case AtomicRMWInst::Sub:
1496 case AtomicRMWInst::Or:
1497 case AtomicRMWInst::Xor:
1498 return C->isZero();
1499 case AtomicRMWInst::And:
1500 return C->isMinusOne();
1501 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1502 default:
1503 return false;
1504 }
1505}
1506
1507bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1508 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1509 tryExpandAtomicLoad(ResultingLoad);
1510 return true;
1511 }
1512 return false;
1513}
1514
1515Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1516 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1517 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1518 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1519 CreateCmpXchgInstFun CreateCmpXchg) {
1520 LLVMContext &Ctx = Builder.getContext();
1521 BasicBlock *BB = Builder.GetInsertBlock();
1522 Function *F = BB->getParent();
1523
1524 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1525 //
1526 // The standard expansion we produce is:
1527 // [...]
1528 // %init_loaded = load atomic iN* %addr
1529 // br label %loop
1530 // loop:
1531 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1532 // %new = some_op iN %loaded, %incr
1533 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1534 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1535 // %success = extractvalue { iN, i1 } %pair, 1
1536 // br i1 %success, label %atomicrmw.end, label %loop
1537 // atomicrmw.end:
1538 // [...]
1539 BasicBlock *ExitBB =
1540 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1541 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1542
1543 // The split call above "helpfully" added a branch at the end of BB (to the
1544 // wrong place), but we want a load. It's easiest to just remove
1545 // the branch entirely.
1546 std::prev(BB->end())->eraseFromParent();
1547 Builder.SetInsertPoint(BB);
1548 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1549 Builder.CreateBr(LoopBB);
1550
1551 // Start the main loop block now that we've taken care of the preliminaries.
1552 Builder.SetInsertPoint(LoopBB);
1553 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1554 Loaded->addIncoming(InitLoaded, BB);
1555
1556 Value *NewVal = PerformOp(Builder, Loaded);
1557
1558 Value *NewLoaded = nullptr;
1559 Value *Success = nullptr;
1560
1561 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1562 MemOpOrder == AtomicOrdering::Unordered
1564 : MemOpOrder,
1565 SSID, Success, NewLoaded);
1566 assert(Success && NewLoaded);
1567
1568 Loaded->addIncoming(NewLoaded, LoopBB);
1569
1570 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1571
1572 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1573 return NewLoaded;
1574}
1575
1576bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1577 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1578 unsigned ValueSize = getAtomicOpSize(CI);
1579
1580 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1581 default:
1582 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1584 if (ValueSize < MinCASSize)
1585 return expandPartwordCmpXchg(CI);
1586 return false;
1588 return expandAtomicCmpXchg(CI);
1589 }
1591 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1592 return true;
1594 return lowerAtomicCmpXchgInst(CI);
1595 }
1596}
1597
1598// Note: This function is exposed externally by AtomicExpandUtils.h
1600 CreateCmpXchgInstFun CreateCmpXchg) {
1601 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1602 Builder.setIsFPConstrained(
1603 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1604
1605 // FIXME: If FP exceptions are observable, we should force them off for the
1606 // loop for the FP atomics.
1607 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1608 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1609 AI->getOrdering(), AI->getSyncScopeID(),
1610 [&](IRBuilderBase &Builder, Value *Loaded) {
1611 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1612 AI->getValOperand());
1613 },
1614 CreateCmpXchg);
1615
1616 AI->replaceAllUsesWith(Loaded);
1617 AI->eraseFromParent();
1618 return true;
1619}
1620
1621// In order to use one of the sized library calls such as
1622// __atomic_fetch_add_4, the alignment must be sufficient, the size
1623// must be one of the potentially-specialized sizes, and the value
1624// type must actually exist in C on the target (otherwise, the
1625// function wouldn't actually be defined.)
1626static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1627 const DataLayout &DL) {
1628 // TODO: "LargestSize" is an approximation for "largest type that
1629 // you can express in C". It seems to be the case that int128 is
1630 // supported on all 64-bit platforms, otherwise only up to 64-bit
1631 // integers are supported. If we get this wrong, then we'll try to
1632 // call a sized libcall that doesn't actually exist. There should
1633 // really be some more reliable way in LLVM of determining integer
1634 // sizes which are valid in the target's C ABI...
1635 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1636 return Alignment >= Size &&
1637 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1638 Size <= LargestSize;
1639}
1640
1641void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1642 static const RTLIB::Libcall Libcalls[6] = {
1643 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1644 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1645 unsigned Size = getAtomicOpSize(I);
1646
1647 bool expanded = expandAtomicOpToLibcall(
1648 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1649 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1650 if (!expanded)
1651 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1652}
1653
1654void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1655 static const RTLIB::Libcall Libcalls[6] = {
1656 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1657 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1658 unsigned Size = getAtomicOpSize(I);
1659
1660 bool expanded = expandAtomicOpToLibcall(
1661 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1662 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1663 if (!expanded)
1664 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1665}
1666
1667void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1668 static const RTLIB::Libcall Libcalls[6] = {
1669 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1670 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1671 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1672 unsigned Size = getAtomicOpSize(I);
1673
1674 bool expanded = expandAtomicOpToLibcall(
1675 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1676 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1677 Libcalls);
1678 if (!expanded)
1679 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1680}
1681
1683 static const RTLIB::Libcall LibcallsXchg[6] = {
1684 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1685 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1686 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1687 static const RTLIB::Libcall LibcallsAdd[6] = {
1688 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1689 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1690 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1691 static const RTLIB::Libcall LibcallsSub[6] = {
1692 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1693 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1694 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1695 static const RTLIB::Libcall LibcallsAnd[6] = {
1696 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1697 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1698 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1699 static const RTLIB::Libcall LibcallsOr[6] = {
1700 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1701 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1702 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1703 static const RTLIB::Libcall LibcallsXor[6] = {
1704 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1705 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1706 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1707 static const RTLIB::Libcall LibcallsNand[6] = {
1708 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1709 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1710 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1711
1712 switch (Op) {
1714 llvm_unreachable("Should not have BAD_BINOP.");
1716 return ArrayRef(LibcallsXchg);
1717 case AtomicRMWInst::Add:
1718 return ArrayRef(LibcallsAdd);
1719 case AtomicRMWInst::Sub:
1720 return ArrayRef(LibcallsSub);
1721 case AtomicRMWInst::And:
1722 return ArrayRef(LibcallsAnd);
1723 case AtomicRMWInst::Or:
1724 return ArrayRef(LibcallsOr);
1725 case AtomicRMWInst::Xor:
1726 return ArrayRef(LibcallsXor);
1728 return ArrayRef(LibcallsNand);
1729 case AtomicRMWInst::Max:
1730 case AtomicRMWInst::Min:
1739 // No atomic libcalls are available for max/min/umax/umin.
1740 return {};
1741 }
1742 llvm_unreachable("Unexpected AtomicRMW operation.");
1743}
1744
1745void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1746 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1747
1748 unsigned Size = getAtomicOpSize(I);
1749
1750 bool Success = false;
1751 if (!Libcalls.empty())
1752 Success = expandAtomicOpToLibcall(
1753 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1754 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1755
1756 // The expansion failed: either there were no libcalls at all for
1757 // the operation (min/max), or there were only size-specialized
1758 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1759 // CAS libcall, via a CAS loop, instead.
1760 if (!Success) {
1762 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1763 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1764 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1765 // Create the CAS instruction normally...
1766 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1767 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1769 Success = Builder.CreateExtractValue(Pair, 1, "success");
1770 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1771
1772 // ...and then expand the CAS into a libcall.
1773 expandAtomicCASToLibcall(Pair);
1774 });
1775 }
1776}
1777
1778// A helper routine for the above expandAtomic*ToLibcall functions.
1779//
1780// 'Libcalls' contains an array of enum values for the particular
1781// ATOMIC libcalls to be emitted. All of the other arguments besides
1782// 'I' are extracted from the Instruction subclass by the
1783// caller. Depending on the particular call, some will be null.
1784bool AtomicExpandImpl::expandAtomicOpToLibcall(
1785 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1786 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1787 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1788 assert(Libcalls.size() == 6);
1789
1790 LLVMContext &Ctx = I->getContext();
1791 Module *M = I->getModule();
1792 const DataLayout &DL = M->getDataLayout();
1793 IRBuilder<> Builder(I);
1794 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1795
1796 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1797 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1798
1799 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1800
1801 // TODO: the "order" argument type is "int", not int32. So
1802 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1803 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1804 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1805 Constant *OrderingVal =
1806 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1807 Constant *Ordering2Val = nullptr;
1808 if (CASExpected) {
1809 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1810 Ordering2Val =
1811 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1812 }
1813 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1814
1815 RTLIB::Libcall RTLibType;
1816 if (UseSizedLibcall) {
1817 switch (Size) {
1818 case 1:
1819 RTLibType = Libcalls[1];
1820 break;
1821 case 2:
1822 RTLibType = Libcalls[2];
1823 break;
1824 case 4:
1825 RTLibType = Libcalls[3];
1826 break;
1827 case 8:
1828 RTLibType = Libcalls[4];
1829 break;
1830 case 16:
1831 RTLibType = Libcalls[5];
1832 break;
1833 }
1834 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1835 RTLibType = Libcalls[0];
1836 } else {
1837 // Can't use sized function, and there's no generic for this
1838 // operation, so give up.
1839 return false;
1840 }
1841
1842 if (!TLI->getLibcallName(RTLibType)) {
1843 // This target does not implement the requested atomic libcall so give up.
1844 return false;
1845 }
1846
1847 // Build up the function call. There's two kinds. First, the sized
1848 // variants. These calls are going to be one of the following (with
1849 // N=1,2,4,8,16):
1850 // iN __atomic_load_N(iN *ptr, int ordering)
1851 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1852 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1853 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1854 // int success_order, int failure_order)
1855 //
1856 // Note that these functions can be used for non-integer atomic
1857 // operations, the values just need to be bitcast to integers on the
1858 // way in and out.
1859 //
1860 // And, then, the generic variants. They look like the following:
1861 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1862 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1863 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1864 // int ordering)
1865 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1866 // void *desired, int success_order,
1867 // int failure_order)
1868 //
1869 // The different signatures are built up depending on the
1870 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1871 // variables.
1872
1873 AllocaInst *AllocaCASExpected = nullptr;
1874 AllocaInst *AllocaValue = nullptr;
1875 AllocaInst *AllocaResult = nullptr;
1876
1877 Type *ResultTy;
1879 AttributeList Attr;
1880
1881 // 'size' argument.
1882 if (!UseSizedLibcall) {
1883 // Note, getIntPtrType is assumed equivalent to size_t.
1884 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1885 }
1886
1887 // 'ptr' argument.
1888 // note: This assumes all address spaces share a common libfunc
1889 // implementation and that addresses are convertable. For systems without
1890 // that property, we'd need to extend this mechanism to support AS-specific
1891 // families of atomic intrinsics.
1892 Value *PtrVal = PointerOperand;
1893 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1894 Args.push_back(PtrVal);
1895
1896 // 'expected' argument, if present.
1897 if (CASExpected) {
1898 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1899 AllocaCASExpected->setAlignment(AllocaAlignment);
1900 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1901 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1902 Args.push_back(AllocaCASExpected);
1903 }
1904
1905 // 'val' argument ('desired' for cas), if present.
1906 if (ValueOperand) {
1907 if (UseSizedLibcall) {
1908 Value *IntValue =
1909 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1910 Args.push_back(IntValue);
1911 } else {
1912 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1913 AllocaValue->setAlignment(AllocaAlignment);
1914 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1915 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1916 Args.push_back(AllocaValue);
1917 }
1918 }
1919
1920 // 'ret' argument.
1921 if (!CASExpected && HasResult && !UseSizedLibcall) {
1922 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1923 AllocaResult->setAlignment(AllocaAlignment);
1924 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1925 Args.push_back(AllocaResult);
1926 }
1927
1928 // 'ordering' ('success_order' for cas) argument.
1929 Args.push_back(OrderingVal);
1930
1931 // 'failure_order' argument, if present.
1932 if (Ordering2Val)
1933 Args.push_back(Ordering2Val);
1934
1935 // Now, the return type.
1936 if (CASExpected) {
1937 ResultTy = Type::getInt1Ty(Ctx);
1938 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1939 } else if (HasResult && UseSizedLibcall)
1940 ResultTy = SizedIntTy;
1941 else
1942 ResultTy = Type::getVoidTy(Ctx);
1943
1944 // Done with setting up arguments and return types, create the call:
1946 for (Value *Arg : Args)
1947 ArgTys.push_back(Arg->getType());
1948 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1949 FunctionCallee LibcallFn =
1950 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1951 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1952 Call->setAttributes(Attr);
1953 Value *Result = Call;
1954
1955 // And then, extract the results...
1956 if (ValueOperand && !UseSizedLibcall)
1957 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
1958
1959 if (CASExpected) {
1960 // The final result from the CAS is {load of 'expected' alloca, bool result
1961 // from call}
1962 Type *FinalResultTy = I->getType();
1963 Value *V = PoisonValue::get(FinalResultTy);
1964 Value *ExpectedOut = Builder.CreateAlignedLoad(
1965 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1966 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
1967 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1968 V = Builder.CreateInsertValue(V, Result, 1);
1969 I->replaceAllUsesWith(V);
1970 } else if (HasResult) {
1971 Value *V;
1972 if (UseSizedLibcall)
1973 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1974 else {
1975 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1976 AllocaAlignment);
1977 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
1978 }
1979 I->replaceAllUsesWith(V);
1980 }
1981 I->eraseFromParent();
1982 return true;
1983}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:651
void setWeak(bool IsWeak)
Definition: Instructions.h:608
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:599
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:588
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:606
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:603
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:626
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:664
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:877
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ FSub
*p = old - v
Definition: Instructions.h:788
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:796
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:792
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
Value * getPointerOperand()
Definition: Instructions.h:910
BinOp getOperation() const
Definition: Instructions.h:845
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:901
Value * getValOperand()
Definition: Instructions.h:914
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:567
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1841
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2523
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:539
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1807
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:481
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2245
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2205
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:317
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:496
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2132
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:230
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:266
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:233
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:364
void setAlignment(Align Align)
Definition: Instructions.h:373
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:400
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:41
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397