LLVM 19.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/User.h"
43#include "llvm/IR/Value.h"
45#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <iterator>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "atomic-expand"
60
61namespace {
62
63class AtomicExpandImpl {
64 const TargetLowering *TLI = nullptr;
65 const DataLayout *DL = nullptr;
66
67private:
68 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
69 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
70 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
71 bool tryExpandAtomicLoad(LoadInst *LI);
72 bool expandAtomicLoadToLL(LoadInst *LI);
73 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
74 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
75 bool tryExpandAtomicStore(StoreInst *SI);
76 void expandAtomicStore(StoreInst *SI);
77 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
78 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
79 Value *
80 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
81 Align AddrAlign, AtomicOrdering MemOpOrder,
82 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
83 void expandAtomicOpToLLSC(
84 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
85 AtomicOrdering MemOpOrder,
86 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
87 void expandPartwordAtomicRMW(
89 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
90 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
91 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
92 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
93
94 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
95 static Value *insertRMWCmpXchgLoop(
96 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
97 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
98 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
99 CreateCmpXchgInstFun CreateCmpXchg);
100 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
101
102 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103 bool isIdempotentRMW(AtomicRMWInst *RMWI);
104 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
105
106 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
107 Value *PointerOperand, Value *ValueOperand,
108 Value *CASExpected, AtomicOrdering Ordering,
109 AtomicOrdering Ordering2,
110 ArrayRef<RTLIB::Libcall> Libcalls);
111 void expandAtomicLoadToLibcall(LoadInst *LI);
112 void expandAtomicStoreToLibcall(StoreInst *LI);
113 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
114 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
115
116 friend bool
118 CreateCmpXchgInstFun CreateCmpXchg);
119
120public:
121 bool run(Function &F, const TargetMachine *TM);
122};
123
124class AtomicExpandLegacy : public FunctionPass {
125public:
126 static char ID; // Pass identification, replacement for typeid
127
128 AtomicExpandLegacy() : FunctionPass(ID) {
130 }
131
132 bool runOnFunction(Function &F) override;
133};
134
135// IRBuilder to be used for replacement atomic instructions.
136struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
137 // Preserves the DebugLoc from I, and preserves still valid metadata.
138 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
139 : IRBuilder(I->getContext(), DL) {
141 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
142 }
143};
144
145} // end anonymous namespace
146
147char AtomicExpandLegacy::ID = 0;
148
149char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
150
152 "Expand Atomic instructions", false, false)
154INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
155 "Expand Atomic instructions", false, false)
156
157// Helper functions to retrieve the size of atomic instructions.
158static unsigned getAtomicOpSize(LoadInst *LI) {
159 const DataLayout &DL = LI->getModule()->getDataLayout();
160 return DL.getTypeStoreSize(LI->getType());
161}
162
163static unsigned getAtomicOpSize(StoreInst *SI) {
164 const DataLayout &DL = SI->getModule()->getDataLayout();
165 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
166}
167
168static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
169 const DataLayout &DL = RMWI->getModule()->getDataLayout();
170 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
171}
172
173static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
174 const DataLayout &DL = CASI->getModule()->getDataLayout();
175 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
176}
177
178// Determine if a particular atomic operation has a supported size,
179// and is of appropriate alignment, to be passed through for target
180// lowering. (Versus turning into a __atomic libcall)
181template <typename Inst>
182static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
183 unsigned Size = getAtomicOpSize(I);
184 Align Alignment = I->getAlign();
185 return Alignment >= Size &&
187}
188
189bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
190 const auto *Subtarget = TM->getSubtargetImpl(F);
191 if (!Subtarget->enableAtomicExpand())
192 return false;
193 TLI = Subtarget->getTargetLowering();
194 DL = &F.getParent()->getDataLayout();
195
197
198 // Changing control-flow while iterating through it is a bad idea, so gather a
199 // list of all atomic instructions before we start.
200 for (Instruction &I : instructions(F))
201 if (I.isAtomic() && !isa<FenceInst>(&I))
202 AtomicInsts.push_back(&I);
203
204 bool MadeChange = false;
205 for (auto *I : AtomicInsts) {
206 auto LI = dyn_cast<LoadInst>(I);
207 auto SI = dyn_cast<StoreInst>(I);
208 auto RMWI = dyn_cast<AtomicRMWInst>(I);
209 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
210 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
211
212 // If the Size/Alignment is not supported, replace with a libcall.
213 if (LI) {
214 if (!atomicSizeSupported(TLI, LI)) {
215 expandAtomicLoadToLibcall(LI);
216 MadeChange = true;
217 continue;
218 }
219 } else if (SI) {
220 if (!atomicSizeSupported(TLI, SI)) {
221 expandAtomicStoreToLibcall(SI);
222 MadeChange = true;
223 continue;
224 }
225 } else if (RMWI) {
226 if (!atomicSizeSupported(TLI, RMWI)) {
227 expandAtomicRMWToLibcall(RMWI);
228 MadeChange = true;
229 continue;
230 }
231 } else if (CASI) {
232 if (!atomicSizeSupported(TLI, CASI)) {
233 expandAtomicCASToLibcall(CASI);
234 MadeChange = true;
235 continue;
236 }
237 }
238
239 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
240 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
241 I = LI = convertAtomicLoadToIntegerType(LI);
242 MadeChange = true;
243 } else if (SI &&
244 TLI->shouldCastAtomicStoreInIR(SI) ==
245 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
246 I = SI = convertAtomicStoreToIntegerType(SI);
247 MadeChange = true;
248 } else if (RMWI &&
249 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
250 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
251 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
252 MadeChange = true;
253 } else if (CASI) {
254 // TODO: when we're ready to make the change at the IR level, we can
255 // extend convertCmpXchgToInteger for floating point too.
256 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
257 // TODO: add a TLI hook to control this so that each target can
258 // convert to lowering the original type one at a time.
259 I = CASI = convertCmpXchgToIntegerType(CASI);
260 MadeChange = true;
261 }
262 }
263
264 if (TLI->shouldInsertFencesForAtomic(I)) {
265 auto FenceOrdering = AtomicOrdering::Monotonic;
266 if (LI && isAcquireOrStronger(LI->getOrdering())) {
267 FenceOrdering = LI->getOrdering();
268 LI->setOrdering(AtomicOrdering::Monotonic);
269 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
270 FenceOrdering = SI->getOrdering();
271 SI->setOrdering(AtomicOrdering::Monotonic);
272 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
273 isAcquireOrStronger(RMWI->getOrdering()))) {
274 FenceOrdering = RMWI->getOrdering();
275 RMWI->setOrdering(AtomicOrdering::Monotonic);
276 } else if (CASI &&
277 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
278 TargetLoweringBase::AtomicExpansionKind::None &&
279 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
280 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
281 isAcquireOrStronger(CASI->getFailureOrdering()))) {
282 // If a compare and swap is lowered to LL/SC, we can do smarter fence
283 // insertion, with a stronger one on the success path than on the
284 // failure path. As a result, fence insertion is directly done by
285 // expandAtomicCmpXchg in that case.
286 FenceOrdering = CASI->getMergedOrdering();
287 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
288 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
289 }
290
291 if (FenceOrdering != AtomicOrdering::Monotonic) {
292 MadeChange |= bracketInstWithFences(I, FenceOrdering);
293 }
294 } else if (I->hasAtomicStore() &&
295 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
296 auto FenceOrdering = AtomicOrdering::Monotonic;
297 if (SI)
298 FenceOrdering = SI->getOrdering();
299 else if (RMWI)
300 FenceOrdering = RMWI->getOrdering();
301 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
302 TargetLoweringBase::AtomicExpansionKind::LLSC)
303 // LLSC is handled in expandAtomicCmpXchg().
304 FenceOrdering = CASI->getSuccessOrdering();
305
306 IRBuilder Builder(I);
307 if (auto TrailingFence =
308 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
309 TrailingFence->moveAfter(I);
310 MadeChange = true;
311 }
312 }
313
314 if (LI)
315 MadeChange |= tryExpandAtomicLoad(LI);
316 else if (SI)
317 MadeChange |= tryExpandAtomicStore(SI);
318 else if (RMWI) {
319 // There are two different ways of expanding RMW instructions:
320 // - into a load if it is idempotent
321 // - into a Cmpxchg/LL-SC loop otherwise
322 // we try them in that order.
323
324 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
325 MadeChange = true;
326 } else {
327 MadeChange |= tryExpandAtomicRMW(RMWI);
328 }
329 } else if (CASI)
330 MadeChange |= tryExpandAtomicCmpXchg(CASI);
331 }
332 return MadeChange;
333}
334
335bool AtomicExpandLegacy::runOnFunction(Function &F) {
336
337 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
338 if (!TPC)
339 return false;
340 auto *TM = &TPC->getTM<TargetMachine>();
341 AtomicExpandImpl AE;
342 return AE.run(F, TM);
343}
344
346 return new AtomicExpandLegacy();
347}
348
351 AtomicExpandImpl AE;
352
353 bool Changed = AE.run(F, TM);
354 if (!Changed)
355 return PreservedAnalyses::all();
356
358}
359
360bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
361 AtomicOrdering Order) {
362 ReplacementIRBuilder Builder(I, *DL);
363
364 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
365
366 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
367 // We have a guard here because not every atomic operation generates a
368 // trailing fence.
369 if (TrailingFence)
370 TrailingFence->moveAfter(I);
371
372 return (LeadingFence || TrailingFence);
373}
374
375/// Get the iX type with the same bitwidth as T.
377AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
378 EVT VT = TLI->getMemValueType(DL, T);
379 unsigned BitWidth = VT.getStoreSizeInBits();
380 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
381 return IntegerType::get(T->getContext(), BitWidth);
382}
383
384/// Convert an atomic load of a non-integral type to an integer load of the
385/// equivalent bitwidth. See the function comment on
386/// convertAtomicStoreToIntegerType for background.
387LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
388 auto *M = LI->getModule();
389 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
390
391 ReplacementIRBuilder Builder(LI, *DL);
392
394
395 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
396 NewLI->setAlignment(LI->getAlign());
397 NewLI->setVolatile(LI->isVolatile());
398 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
399 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
400
401 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
402 LI->replaceAllUsesWith(NewVal);
403 LI->eraseFromParent();
404 return NewLI;
405}
406
408AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
409 auto *M = RMWI->getModule();
410 Type *NewTy =
411 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
412
413 ReplacementIRBuilder Builder(RMWI, *DL);
414
415 Value *Addr = RMWI->getPointerOperand();
416 Value *Val = RMWI->getValOperand();
417 Value *NewVal = Val->getType()->isPointerTy()
418 ? Builder.CreatePtrToInt(Val, NewTy)
419 : Builder.CreateBitCast(Val, NewTy);
420
421 auto *NewRMWI = Builder.CreateAtomicRMW(
422 AtomicRMWInst::Xchg, Addr, NewVal, RMWI->getAlign(), RMWI->getOrdering());
423 NewRMWI->setVolatile(RMWI->isVolatile());
424 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
425
426 Value *NewRVal = RMWI->getType()->isPointerTy()
427 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
428 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
429 RMWI->replaceAllUsesWith(NewRVal);
430 RMWI->eraseFromParent();
431 return NewRMWI;
432}
433
434bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
435 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
437 return false;
439 expandAtomicOpToLLSC(
440 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
441 LI->getOrdering(),
442 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
443 return true;
445 return expandAtomicLoadToLL(LI);
447 return expandAtomicLoadToCmpXchg(LI);
450 return true;
451 default:
452 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
453 }
454}
455
456bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
457 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
459 return false;
461 expandAtomicStore(SI);
462 return true;
464 SI->setAtomic(AtomicOrdering::NotAtomic);
465 return true;
466 default:
467 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
468 }
469}
470
471bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
472 ReplacementIRBuilder Builder(LI, *DL);
473
474 // On some architectures, load-linked instructions are atomic for larger
475 // sizes than normal loads. For example, the only 64-bit load guaranteed
476 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
477 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
478 LI->getPointerOperand(), LI->getOrdering());
479 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
480
481 LI->replaceAllUsesWith(Val);
482 LI->eraseFromParent();
483
484 return true;
485}
486
487bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
488 ReplacementIRBuilder Builder(LI, *DL);
489 AtomicOrdering Order = LI->getOrdering();
490 if (Order == AtomicOrdering::Unordered)
492
494 Type *Ty = LI->getType();
495 Constant *DummyVal = Constant::getNullValue(Ty);
496
497 Value *Pair = Builder.CreateAtomicCmpXchg(
498 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
500 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
501
502 LI->replaceAllUsesWith(Loaded);
503 LI->eraseFromParent();
504
505 return true;
506}
507
508/// Convert an atomic store of a non-integral type to an integer store of the
509/// equivalent bitwidth. We used to not support floating point or vector
510/// atomics in the IR at all. The backends learned to deal with the bitcast
511/// idiom because that was the only way of expressing the notion of a atomic
512/// float or vector store. The long term plan is to teach each backend to
513/// instruction select from the original atomic store, but as a migration
514/// mechanism, we convert back to the old format which the backends understand.
515/// Each backend will need individual work to recognize the new format.
516StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
517 ReplacementIRBuilder Builder(SI, *DL);
518 auto *M = SI->getModule();
519 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
520 M->getDataLayout());
521 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
522
523 Value *Addr = SI->getPointerOperand();
524
525 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
526 NewSI->setAlignment(SI->getAlign());
527 NewSI->setVolatile(SI->isVolatile());
528 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
529 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
530 SI->eraseFromParent();
531 return NewSI;
532}
533
534void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
535 // This function is only called on atomic stores that are too large to be
536 // atomic if implemented as a native store. So we replace them by an
537 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
538 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
539 // It is the responsibility of the target to only signal expansion via
540 // shouldExpandAtomicRMW in cases where this is required and possible.
541 ReplacementIRBuilder Builder(SI, *DL);
542 AtomicOrdering Ordering = SI->getOrdering();
544 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
546 : Ordering;
547 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
548 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
549 SI->getAlign(), RMWOrdering);
550 SI->eraseFromParent();
551
552 // Now we have an appropriate swap instruction, lower it as usual.
553 tryExpandAtomicRMW(AI);
554}
555
557 Value *Loaded, Value *NewVal, Align AddrAlign,
558 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
559 Value *&Success, Value *&NewLoaded) {
560 Type *OrigTy = NewVal->getType();
561
562 // This code can go away when cmpxchg supports FP types.
563 assert(!OrigTy->isPointerTy());
564 bool NeedBitcast = OrigTy->isFloatingPointTy();
565 if (NeedBitcast) {
566 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
567 NewVal = Builder.CreateBitCast(NewVal, IntTy);
568 Loaded = Builder.CreateBitCast(Loaded, IntTy);
569 }
570
571 Value *Pair = Builder.CreateAtomicCmpXchg(
572 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
574 Success = Builder.CreateExtractValue(Pair, 1, "success");
575 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
576
577 if (NeedBitcast)
578 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
579}
580
581bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
582 LLVMContext &Ctx = AI->getModule()->getContext();
583 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
584 switch (Kind) {
586 return false;
588 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
589 unsigned ValueSize = getAtomicOpSize(AI);
590 if (ValueSize < MinCASSize) {
591 expandPartwordAtomicRMW(AI,
593 } else {
594 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
595 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
596 AI->getValOperand());
597 };
598 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
599 AI->getAlign(), AI->getOrdering(), PerformOp);
600 }
601 return true;
602 }
604 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
605 unsigned ValueSize = getAtomicOpSize(AI);
606 if (ValueSize < MinCASSize) {
607 expandPartwordAtomicRMW(AI,
609 } else {
611 Ctx.getSyncScopeNames(SSNs);
612 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
613 ? "system"
614 : SSNs[AI->getSyncScopeID()];
616 ORE.emit([&]() {
617 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
618 << "A compare and swap loop was generated for an atomic "
619 << AI->getOperationName(AI->getOperation()) << " operation at "
620 << MemScope << " memory scope";
621 });
623 }
624 return true;
625 }
627 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
628 unsigned ValueSize = getAtomicOpSize(AI);
629 if (ValueSize < MinCASSize) {
631 // Widen And/Or/Xor and give the target another chance at expanding it.
634 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
635 return true;
636 }
637 }
638 expandAtomicRMWToMaskedIntrinsic(AI);
639 return true;
640 }
642 TLI->emitBitTestAtomicRMWIntrinsic(AI);
643 return true;
644 }
646 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
647 return true;
648 }
650 return lowerAtomicRMWInst(AI);
652 TLI->emitExpandAtomicRMW(AI);
653 return true;
654 default:
655 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
656 }
657}
658
659namespace {
660
661struct PartwordMaskValues {
662 // These three fields are guaranteed to be set by createMaskInstrs.
663 Type *WordType = nullptr;
664 Type *ValueType = nullptr;
665 Type *IntValueType = nullptr;
666 Value *AlignedAddr = nullptr;
667 Align AlignedAddrAlignment;
668 // The remaining fields can be null.
669 Value *ShiftAmt = nullptr;
670 Value *Mask = nullptr;
671 Value *Inv_Mask = nullptr;
672};
673
675raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
676 auto PrintObj = [&O](auto *V) {
677 if (V)
678 O << *V;
679 else
680 O << "nullptr";
681 O << '\n';
682 };
683 O << "PartwordMaskValues {\n";
684 O << " WordType: ";
685 PrintObj(PMV.WordType);
686 O << " ValueType: ";
687 PrintObj(PMV.ValueType);
688 O << " AlignedAddr: ";
689 PrintObj(PMV.AlignedAddr);
690 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
691 O << " ShiftAmt: ";
692 PrintObj(PMV.ShiftAmt);
693 O << " Mask: ";
694 PrintObj(PMV.Mask);
695 O << " Inv_Mask: ";
696 PrintObj(PMV.Inv_Mask);
697 O << "}\n";
698 return O;
699}
700
701} // end anonymous namespace
702
703/// This is a helper function which builds instructions to provide
704/// values necessary for partword atomic operations. It takes an
705/// incoming address, Addr, and ValueType, and constructs the address,
706/// shift-amounts and masks needed to work with a larger value of size
707/// WordSize.
708///
709/// AlignedAddr: Addr rounded down to a multiple of WordSize
710///
711/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
712/// from AlignAddr for it to have the same value as if
713/// ValueType was loaded from Addr.
714///
715/// Mask: Value to mask with the value loaded from AlignAddr to
716/// include only the part that would've been loaded from Addr.
717///
718/// Inv_Mask: The inverse of Mask.
719static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
721 Value *Addr, Align AddrAlign,
722 unsigned MinWordSize) {
723 PartwordMaskValues PMV;
724
725 Module *M = I->getModule();
726 LLVMContext &Ctx = M->getContext();
727 const DataLayout &DL = M->getDataLayout();
728 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
729
730 PMV.ValueType = PMV.IntValueType = ValueType;
731 if (PMV.ValueType->isFloatingPointTy())
732 PMV.IntValueType =
733 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
734
735 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
736 : ValueType;
737 if (PMV.ValueType == PMV.WordType) {
738 PMV.AlignedAddr = Addr;
739 PMV.AlignedAddrAlignment = AddrAlign;
740 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
741 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
742 return PMV;
743 }
744
745 PMV.AlignedAddrAlignment = Align(MinWordSize);
746
747 assert(ValueSize < MinWordSize);
748
749 PointerType *PtrTy = cast<PointerType>(Addr->getType());
750 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
751 Value *PtrLSB;
752
753 if (AddrAlign < MinWordSize) {
754 PMV.AlignedAddr = Builder.CreateIntrinsic(
755 Intrinsic::ptrmask, {PtrTy, IntTy},
756 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
757 "AlignedAddr");
758
759 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
760 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
761 } else {
762 // If the alignment is high enough, the LSB are known 0.
763 PMV.AlignedAddr = Addr;
764 PtrLSB = ConstantInt::getNullValue(IntTy);
765 }
766
767 if (DL.isLittleEndian()) {
768 // turn bytes into bits
769 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
770 } else {
771 // turn bytes into bits, and count from the other side.
772 PMV.ShiftAmt = Builder.CreateShl(
773 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
774 }
775
776 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
777 PMV.Mask = Builder.CreateShl(
778 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
779 "Mask");
780
781 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
782
783 return PMV;
784}
785
786static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
787 const PartwordMaskValues &PMV) {
788 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
789 if (PMV.WordType == PMV.ValueType)
790 return WideWord;
791
792 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
793 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
794 return Builder.CreateBitCast(Trunc, PMV.ValueType);
795}
796
797static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
798 Value *Updated, const PartwordMaskValues &PMV) {
799 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
800 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
801 if (PMV.WordType == PMV.ValueType)
802 return Updated;
803
804 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
805
806 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
807 Value *Shift =
808 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
809 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
810 Value *Or = Builder.CreateOr(And, Shift, "inserted");
811 return Or;
812}
813
814/// Emit IR to implement a masked version of a given atomicrmw
815/// operation. (That is, only the bits under the Mask should be
816/// affected by the operation)
818 IRBuilderBase &Builder, Value *Loaded,
819 Value *Shifted_Inc, Value *Inc,
820 const PartwordMaskValues &PMV) {
821 // TODO: update to use
822 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
823 // to merge bits from two values without requiring PMV.Inv_Mask.
824 switch (Op) {
825 case AtomicRMWInst::Xchg: {
826 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
827 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
828 return FinalVal;
829 }
833 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
836 case AtomicRMWInst::Nand: {
837 // The other arithmetic ops need to be masked into place.
838 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
839 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
840 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
841 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
842 return FinalVal;
843 }
854 // Finally, other ops will operate on the full value, so truncate down to
855 // the original size, and expand out again after doing the
856 // operation. Bitcasts will be inserted for FP values.
857 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
858 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
859 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
860 return FinalVal;
861 }
862 default:
863 llvm_unreachable("Unknown atomic op");
864 }
865}
866
867/// Expand a sub-word atomicrmw operation into an appropriate
868/// word-sized operation.
869///
870/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
871/// way as a typical atomicrmw expansion. The only difference here is
872/// that the operation inside of the loop may operate upon only a
873/// part of the value.
874void AtomicExpandImpl::expandPartwordAtomicRMW(
876 // Widen And/Or/Xor and give the target another chance at expanding it.
880 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
881 return;
882 }
883 AtomicOrdering MemOpOrder = AI->getOrdering();
884 SyncScope::ID SSID = AI->getSyncScopeID();
885
886 ReplacementIRBuilder Builder(AI, *DL);
887
888 PartwordMaskValues PMV =
889 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
890 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
891
892 Value *ValOperand_Shifted = nullptr;
895 ValOperand_Shifted =
896 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
897 PMV.ShiftAmt, "ValOperand_Shifted");
898 }
899
900 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
901 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
902 AI->getValOperand(), PMV);
903 };
904
905 Value *OldResult;
907 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
908 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
909 PerformPartwordOp, createCmpXchgInstFun);
910 } else {
912 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
913 PMV.AlignedAddrAlignment, MemOpOrder,
914 PerformPartwordOp);
915 }
916
917 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
918 AI->replaceAllUsesWith(FinalOldResult);
919 AI->eraseFromParent();
920}
921
922// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
923AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
924 ReplacementIRBuilder Builder(AI, *DL);
926
929 "Unable to widen operation");
930
931 PartwordMaskValues PMV =
932 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
933 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
934
935 Value *ValOperand_Shifted =
936 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
937 PMV.ShiftAmt, "ValOperand_Shifted");
938
939 Value *NewOperand;
940
941 if (Op == AtomicRMWInst::And)
942 NewOperand =
943 Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
944 else
945 NewOperand = ValOperand_Shifted;
946
947 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
948 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
949 AI->getOrdering(), AI->getSyncScopeID());
950 // TODO: Preserve metadata
951
952 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
953 AI->replaceAllUsesWith(FinalOldResult);
954 AI->eraseFromParent();
955 return NewAI;
956}
957
958bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
959 // The basic idea here is that we're expanding a cmpxchg of a
960 // smaller memory size up to a word-sized cmpxchg. To do this, we
961 // need to add a retry-loop for strong cmpxchg, so that
962 // modifications to other parts of the word don't cause a spurious
963 // failure.
964
965 // This generates code like the following:
966 // [[Setup mask values PMV.*]]
967 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
968 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
969 // %InitLoaded = load i32* %addr
970 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
971 // br partword.cmpxchg.loop
972 // partword.cmpxchg.loop:
973 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
974 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
975 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
976 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
977 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
978 // i32 %FullWord_NewVal success_ordering failure_ordering
979 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
980 // %Success = extractvalue { i32, i1 } %NewCI, 1
981 // br i1 %Success, label %partword.cmpxchg.end,
982 // label %partword.cmpxchg.failure
983 // partword.cmpxchg.failure:
984 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
985 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
986 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
987 // label %partword.cmpxchg.end
988 // partword.cmpxchg.end:
989 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
990 // %FinalOldVal = trunc i32 %tmp1 to i8
991 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
992 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
993
995 Value *Cmp = CI->getCompareOperand();
996 Value *NewVal = CI->getNewValOperand();
997
998 BasicBlock *BB = CI->getParent();
999 Function *F = BB->getParent();
1000 ReplacementIRBuilder Builder(CI, *DL);
1001 LLVMContext &Ctx = Builder.getContext();
1002
1003 BasicBlock *EndBB =
1004 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1005 auto FailureBB =
1006 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1007 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1008
1009 // The split call above "helpfully" added a branch at the end of BB
1010 // (to the wrong place).
1011 std::prev(BB->end())->eraseFromParent();
1012 Builder.SetInsertPoint(BB);
1013
1014 PartwordMaskValues PMV =
1015 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1016 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1017
1018 // Shift the incoming values over, into the right location in the word.
1019 Value *NewVal_Shifted =
1020 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1021 Value *Cmp_Shifted =
1022 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1023
1024 // Load the entire current word, and mask into place the expected and new
1025 // values
1026 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1027 InitLoaded->setVolatile(CI->isVolatile());
1028 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1029 Builder.CreateBr(LoopBB);
1030
1031 // partword.cmpxchg.loop:
1032 Builder.SetInsertPoint(LoopBB);
1033 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1034 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1035
1036 // Mask/Or the expected and new values into place in the loaded word.
1037 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1038 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1039 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1040 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1042 NewCI->setVolatile(CI->isVolatile());
1043 // When we're building a strong cmpxchg, we need a loop, so you
1044 // might think we could use a weak cmpxchg inside. But, using strong
1045 // allows the below comparison for ShouldContinue, and we're
1046 // expecting the underlying cmpxchg to be a machine instruction,
1047 // which is strong anyways.
1048 NewCI->setWeak(CI->isWeak());
1049
1050 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1051 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1052
1053 if (CI->isWeak())
1054 Builder.CreateBr(EndBB);
1055 else
1056 Builder.CreateCondBr(Success, EndBB, FailureBB);
1057
1058 // partword.cmpxchg.failure:
1059 Builder.SetInsertPoint(FailureBB);
1060 // Upon failure, verify that the masked-out part of the loaded value
1061 // has been modified. If it didn't, abort the cmpxchg, since the
1062 // masked-in part must've.
1063 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1064 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1065 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1066
1067 // Add the second value to the phi from above
1068 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1069
1070 // partword.cmpxchg.end:
1071 Builder.SetInsertPoint(CI);
1072
1073 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1074 Value *Res = PoisonValue::get(CI->getType());
1075 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1076 Res = Builder.CreateInsertValue(Res, Success, 1);
1077
1078 CI->replaceAllUsesWith(Res);
1079 CI->eraseFromParent();
1080 return true;
1081}
1082
1083void AtomicExpandImpl::expandAtomicOpToLLSC(
1084 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1085 AtomicOrdering MemOpOrder,
1086 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1087 ReplacementIRBuilder Builder(I, *DL);
1088 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1089 MemOpOrder, PerformOp);
1090
1091 I->replaceAllUsesWith(Loaded);
1092 I->eraseFromParent();
1093}
1094
1095void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1096 ReplacementIRBuilder Builder(AI, *DL);
1097
1098 PartwordMaskValues PMV =
1099 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1100 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1101
1102 // The value operand must be sign-extended for signed min/max so that the
1103 // target's signed comparison instructions can be used. Otherwise, just
1104 // zero-ext.
1105 Instruction::CastOps CastOp = Instruction::ZExt;
1106 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1107 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1108 CastOp = Instruction::SExt;
1109
1110 Value *ValOperand_Shifted = Builder.CreateShl(
1111 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1112 PMV.ShiftAmt, "ValOperand_Shifted");
1113 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1114 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1115 AI->getOrdering());
1116 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1117 AI->replaceAllUsesWith(FinalOldResult);
1118 AI->eraseFromParent();
1119}
1120
1121void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1122 AtomicCmpXchgInst *CI) {
1123 ReplacementIRBuilder Builder(CI, *DL);
1124
1125 PartwordMaskValues PMV = createMaskInstrs(
1126 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1127 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1128
1129 Value *CmpVal_Shifted = Builder.CreateShl(
1130 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1131 "CmpVal_Shifted");
1132 Value *NewVal_Shifted = Builder.CreateShl(
1133 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1134 "NewVal_Shifted");
1135 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1136 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1137 CI->getMergedOrdering());
1138 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1139 Value *Res = PoisonValue::get(CI->getType());
1140 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1141 Value *Success = Builder.CreateICmpEQ(
1142 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1143 Res = Builder.CreateInsertValue(Res, Success, 1);
1144
1145 CI->replaceAllUsesWith(Res);
1146 CI->eraseFromParent();
1147}
1148
1149Value *AtomicExpandImpl::insertRMWLLSCLoop(
1150 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1151 AtomicOrdering MemOpOrder,
1152 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1153 LLVMContext &Ctx = Builder.getContext();
1154 BasicBlock *BB = Builder.GetInsertBlock();
1155 Function *F = BB->getParent();
1156
1157 assert(AddrAlign >=
1158 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1159 "Expected at least natural alignment at this point.");
1160
1161 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1162 //
1163 // The standard expansion we produce is:
1164 // [...]
1165 // atomicrmw.start:
1166 // %loaded = @load.linked(%addr)
1167 // %new = some_op iN %loaded, %incr
1168 // %stored = @store_conditional(%new, %addr)
1169 // %try_again = icmp i32 ne %stored, 0
1170 // br i1 %try_again, label %loop, label %atomicrmw.end
1171 // atomicrmw.end:
1172 // [...]
1173 BasicBlock *ExitBB =
1174 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1175 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1176
1177 // The split call above "helpfully" added a branch at the end of BB (to the
1178 // wrong place).
1179 std::prev(BB->end())->eraseFromParent();
1180 Builder.SetInsertPoint(BB);
1181 Builder.CreateBr(LoopBB);
1182
1183 // Start the main loop block now that we've taken care of the preliminaries.
1184 Builder.SetInsertPoint(LoopBB);
1185 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1186
1187 Value *NewVal = PerformOp(Builder, Loaded);
1188
1189 Value *StoreSuccess =
1190 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1191 Value *TryAgain = Builder.CreateICmpNE(
1192 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1193 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1194
1195 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1196 return Loaded;
1197}
1198
1199/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1200/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1201/// IR. As a migration step, we convert back to what use to be the standard
1202/// way to represent a pointer cmpxchg so that we can update backends one by
1203/// one.
1205AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1206 auto *M = CI->getModule();
1207 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1208 M->getDataLayout());
1209
1210 ReplacementIRBuilder Builder(CI, *DL);
1211
1212 Value *Addr = CI->getPointerOperand();
1213
1214 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1215 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1216
1217 auto *NewCI = Builder.CreateAtomicCmpXchg(
1218 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1219 CI->getFailureOrdering(), CI->getSyncScopeID());
1220 NewCI->setVolatile(CI->isVolatile());
1221 NewCI->setWeak(CI->isWeak());
1222 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1223
1224 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1225 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1226
1227 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1228
1229 Value *Res = PoisonValue::get(CI->getType());
1230 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1231 Res = Builder.CreateInsertValue(Res, Succ, 1);
1232
1233 CI->replaceAllUsesWith(Res);
1234 CI->eraseFromParent();
1235 return NewCI;
1236}
1237
1238bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1239 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1240 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1241 Value *Addr = CI->getPointerOperand();
1242 BasicBlock *BB = CI->getParent();
1243 Function *F = BB->getParent();
1244 LLVMContext &Ctx = F->getContext();
1245 // If shouldInsertFencesForAtomic() returns true, then the target does not
1246 // want to deal with memory orders, and emitLeading/TrailingFence should take
1247 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1248 // should preserve the ordering.
1249 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1250 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1252 : CI->getMergedOrdering();
1253
1254 // In implementations which use a barrier to achieve release semantics, we can
1255 // delay emitting this barrier until we know a store is actually going to be
1256 // attempted. The cost of this delay is that we need 2 copies of the block
1257 // emitting the load-linked, affecting code size.
1258 //
1259 // Ideally, this logic would be unconditional except for the minsize check
1260 // since in other cases the extra blocks naturally collapse down to the
1261 // minimal loop. Unfortunately, this puts too much stress on later
1262 // optimisations so we avoid emitting the extra logic in those cases too.
1263 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1264 SuccessOrder != AtomicOrdering::Monotonic &&
1265 SuccessOrder != AtomicOrdering::Acquire &&
1266 !F->hasMinSize();
1267
1268 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1269 // do it even on minsize.
1270 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1271
1272 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1273 //
1274 // The full expansion we produce is:
1275 // [...]
1276 // %aligned.addr = ...
1277 // cmpxchg.start:
1278 // %unreleasedload = @load.linked(%aligned.addr)
1279 // %unreleasedload.extract = extract value from %unreleasedload
1280 // %should_store = icmp eq %unreleasedload.extract, %desired
1281 // br i1 %should_store, label %cmpxchg.releasingstore,
1282 // label %cmpxchg.nostore
1283 // cmpxchg.releasingstore:
1284 // fence?
1285 // br label cmpxchg.trystore
1286 // cmpxchg.trystore:
1287 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1288 // [%releasedload, %cmpxchg.releasedload]
1289 // %updated.new = insert %new into %loaded.trystore
1290 // %stored = @store_conditional(%updated.new, %aligned.addr)
1291 // %success = icmp eq i32 %stored, 0
1292 // br i1 %success, label %cmpxchg.success,
1293 // label %cmpxchg.releasedload/%cmpxchg.failure
1294 // cmpxchg.releasedload:
1295 // %releasedload = @load.linked(%aligned.addr)
1296 // %releasedload.extract = extract value from %releasedload
1297 // %should_store = icmp eq %releasedload.extract, %desired
1298 // br i1 %should_store, label %cmpxchg.trystore,
1299 // label %cmpxchg.failure
1300 // cmpxchg.success:
1301 // fence?
1302 // br label %cmpxchg.end
1303 // cmpxchg.nostore:
1304 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1305 // [%releasedload,
1306 // %cmpxchg.releasedload/%cmpxchg.trystore]
1307 // @load_linked_fail_balance()?
1308 // br label %cmpxchg.failure
1309 // cmpxchg.failure:
1310 // fence?
1311 // br label %cmpxchg.end
1312 // cmpxchg.end:
1313 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1314 // [%loaded.trystore, %cmpxchg.trystore]
1315 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1316 // %loaded = extract value from %loaded.exit
1317 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1318 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1319 // [...]
1320 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1321 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1322 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1323 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1324 auto ReleasedLoadBB =
1325 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1326 auto TryStoreBB =
1327 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1328 auto ReleasingStoreBB =
1329 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1330 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1331
1332 ReplacementIRBuilder Builder(CI, *DL);
1333
1334 // The split call above "helpfully" added a branch at the end of BB (to the
1335 // wrong place), but we might want a fence too. It's easiest to just remove
1336 // the branch entirely.
1337 std::prev(BB->end())->eraseFromParent();
1338 Builder.SetInsertPoint(BB);
1339 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1340 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1341
1342 PartwordMaskValues PMV =
1343 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1344 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1345 Builder.CreateBr(StartBB);
1346
1347 // Start the main loop block now that we've taken care of the preliminaries.
1348 Builder.SetInsertPoint(StartBB);
1349 Value *UnreleasedLoad =
1350 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1351 Value *UnreleasedLoadExtract =
1352 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1353 Value *ShouldStore = Builder.CreateICmpEQ(
1354 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1355
1356 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1357 // jump straight past that fence instruction (if it exists).
1358 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1359
1360 Builder.SetInsertPoint(ReleasingStoreBB);
1361 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1362 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1363 Builder.CreateBr(TryStoreBB);
1364
1365 Builder.SetInsertPoint(TryStoreBB);
1366 PHINode *LoadedTryStore =
1367 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1368 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1369 Value *NewValueInsert =
1370 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1371 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1372 PMV.AlignedAddr, MemOpOrder);
1373 StoreSuccess = Builder.CreateICmpEQ(
1374 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1375 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1376 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1377 CI->isWeak() ? FailureBB : RetryBB);
1378
1379 Builder.SetInsertPoint(ReleasedLoadBB);
1380 Value *SecondLoad;
1381 if (HasReleasedLoadBB) {
1382 SecondLoad =
1383 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1384 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1385 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1386 CI->getCompareOperand(), "should_store");
1387
1388 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1389 // jump straight past that fence instruction (if it exists).
1390 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1391 // Update PHI node in TryStoreBB.
1392 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1393 } else
1394 Builder.CreateUnreachable();
1395
1396 // Make sure later instructions don't get reordered with a fence if
1397 // necessary.
1398 Builder.SetInsertPoint(SuccessBB);
1399 if (ShouldInsertFencesForAtomic ||
1400 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1401 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1402 Builder.CreateBr(ExitBB);
1403
1404 Builder.SetInsertPoint(NoStoreBB);
1405 PHINode *LoadedNoStore =
1406 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1407 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1408 if (HasReleasedLoadBB)
1409 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1410
1411 // In the failing case, where we don't execute the store-conditional, the
1412 // target might want to balance out the load-linked with a dedicated
1413 // instruction (e.g., on ARM, clearing the exclusive monitor).
1414 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1415 Builder.CreateBr(FailureBB);
1416
1417 Builder.SetInsertPoint(FailureBB);
1418 PHINode *LoadedFailure =
1419 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1420 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1421 if (CI->isWeak())
1422 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1423 if (ShouldInsertFencesForAtomic)
1424 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1425 Builder.CreateBr(ExitBB);
1426
1427 // Finally, we have control-flow based knowledge of whether the cmpxchg
1428 // succeeded or not. We expose this to later passes by converting any
1429 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1430 // PHI.
1431 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1432 PHINode *LoadedExit =
1433 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1434 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1435 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1436 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1437 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1438 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1439
1440 // This is the "exit value" from the cmpxchg expansion. It may be of
1441 // a type wider than the one in the cmpxchg instruction.
1442 Value *LoadedFull = LoadedExit;
1443
1444 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1445 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1446
1447 // Look for any users of the cmpxchg that are just comparing the loaded value
1448 // against the desired one, and replace them with the CFG-derived version.
1450 for (auto *User : CI->users()) {
1451 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1452 if (!EV)
1453 continue;
1454
1455 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1456 "weird extraction from { iN, i1 }");
1457
1458 if (EV->getIndices()[0] == 0)
1459 EV->replaceAllUsesWith(Loaded);
1460 else
1462
1463 PrunedInsts.push_back(EV);
1464 }
1465
1466 // We can remove the instructions now we're no longer iterating through them.
1467 for (auto *EV : PrunedInsts)
1468 EV->eraseFromParent();
1469
1470 if (!CI->use_empty()) {
1471 // Some use of the full struct return that we don't understand has happened,
1472 // so we've got to reconstruct it properly.
1473 Value *Res;
1474 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1475 Res = Builder.CreateInsertValue(Res, Success, 1);
1476
1477 CI->replaceAllUsesWith(Res);
1478 }
1479
1480 CI->eraseFromParent();
1481 return true;
1482}
1483
1484bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1485 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1486 if (!C)
1487 return false;
1488
1490 switch (Op) {
1491 case AtomicRMWInst::Add:
1492 case AtomicRMWInst::Sub:
1493 case AtomicRMWInst::Or:
1494 case AtomicRMWInst::Xor:
1495 return C->isZero();
1496 case AtomicRMWInst::And:
1497 return C->isMinusOne();
1498 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1499 default:
1500 return false;
1501 }
1502}
1503
1504bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1505 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1506 tryExpandAtomicLoad(ResultingLoad);
1507 return true;
1508 }
1509 return false;
1510}
1511
1512Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1513 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1514 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1515 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1516 CreateCmpXchgInstFun CreateCmpXchg) {
1517 LLVMContext &Ctx = Builder.getContext();
1518 BasicBlock *BB = Builder.GetInsertBlock();
1519 Function *F = BB->getParent();
1520
1521 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1522 //
1523 // The standard expansion we produce is:
1524 // [...]
1525 // %init_loaded = load atomic iN* %addr
1526 // br label %loop
1527 // loop:
1528 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1529 // %new = some_op iN %loaded, %incr
1530 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1531 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1532 // %success = extractvalue { iN, i1 } %pair, 1
1533 // br i1 %success, label %atomicrmw.end, label %loop
1534 // atomicrmw.end:
1535 // [...]
1536 BasicBlock *ExitBB =
1537 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1538 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1539
1540 // The split call above "helpfully" added a branch at the end of BB (to the
1541 // wrong place), but we want a load. It's easiest to just remove
1542 // the branch entirely.
1543 std::prev(BB->end())->eraseFromParent();
1544 Builder.SetInsertPoint(BB);
1545 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1546 Builder.CreateBr(LoopBB);
1547
1548 // Start the main loop block now that we've taken care of the preliminaries.
1549 Builder.SetInsertPoint(LoopBB);
1550 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1551 Loaded->addIncoming(InitLoaded, BB);
1552
1553 Value *NewVal = PerformOp(Builder, Loaded);
1554
1555 Value *NewLoaded = nullptr;
1556 Value *Success = nullptr;
1557
1558 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1559 MemOpOrder == AtomicOrdering::Unordered
1561 : MemOpOrder,
1562 SSID, Success, NewLoaded);
1563 assert(Success && NewLoaded);
1564
1565 Loaded->addIncoming(NewLoaded, LoopBB);
1566
1567 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1568
1569 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1570 return NewLoaded;
1571}
1572
1573bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1574 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1575 unsigned ValueSize = getAtomicOpSize(CI);
1576
1577 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1578 default:
1579 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1581 if (ValueSize < MinCASSize)
1582 return expandPartwordCmpXchg(CI);
1583 return false;
1585 return expandAtomicCmpXchg(CI);
1586 }
1588 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1589 return true;
1591 return lowerAtomicCmpXchgInst(CI);
1592 }
1593}
1594
1595// Note: This function is exposed externally by AtomicExpandUtils.h
1597 CreateCmpXchgInstFun CreateCmpXchg) {
1598 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1599 Builder.setIsFPConstrained(
1600 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1601
1602 // FIXME: If FP exceptions are observable, we should force them off for the
1603 // loop for the FP atomics.
1604 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1605 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1606 AI->getOrdering(), AI->getSyncScopeID(),
1607 [&](IRBuilderBase &Builder, Value *Loaded) {
1608 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1609 AI->getValOperand());
1610 },
1611 CreateCmpXchg);
1612
1613 AI->replaceAllUsesWith(Loaded);
1614 AI->eraseFromParent();
1615 return true;
1616}
1617
1618// In order to use one of the sized library calls such as
1619// __atomic_fetch_add_4, the alignment must be sufficient, the size
1620// must be one of the potentially-specialized sizes, and the value
1621// type must actually exist in C on the target (otherwise, the
1622// function wouldn't actually be defined.)
1623static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1624 const DataLayout &DL) {
1625 // TODO: "LargestSize" is an approximation for "largest type that
1626 // you can express in C". It seems to be the case that int128 is
1627 // supported on all 64-bit platforms, otherwise only up to 64-bit
1628 // integers are supported. If we get this wrong, then we'll try to
1629 // call a sized libcall that doesn't actually exist. There should
1630 // really be some more reliable way in LLVM of determining integer
1631 // sizes which are valid in the target's C ABI...
1632 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1633 return Alignment >= Size &&
1634 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1635 Size <= LargestSize;
1636}
1637
1638void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1639 static const RTLIB::Libcall Libcalls[6] = {
1640 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1641 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1642 unsigned Size = getAtomicOpSize(I);
1643
1644 bool expanded = expandAtomicOpToLibcall(
1645 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1646 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1647 if (!expanded)
1648 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1649}
1650
1651void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1652 static const RTLIB::Libcall Libcalls[6] = {
1653 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1654 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1655 unsigned Size = getAtomicOpSize(I);
1656
1657 bool expanded = expandAtomicOpToLibcall(
1658 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1659 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1660 if (!expanded)
1661 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1662}
1663
1664void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1665 static const RTLIB::Libcall Libcalls[6] = {
1666 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1667 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1668 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1669 unsigned Size = getAtomicOpSize(I);
1670
1671 bool expanded = expandAtomicOpToLibcall(
1672 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1673 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1674 Libcalls);
1675 if (!expanded)
1676 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1677}
1678
1680 static const RTLIB::Libcall LibcallsXchg[6] = {
1681 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1682 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1683 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1684 static const RTLIB::Libcall LibcallsAdd[6] = {
1685 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1686 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1687 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1688 static const RTLIB::Libcall LibcallsSub[6] = {
1689 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1690 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1691 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1692 static const RTLIB::Libcall LibcallsAnd[6] = {
1693 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1694 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1695 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1696 static const RTLIB::Libcall LibcallsOr[6] = {
1697 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1698 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1699 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1700 static const RTLIB::Libcall LibcallsXor[6] = {
1701 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1702 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1703 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1704 static const RTLIB::Libcall LibcallsNand[6] = {
1705 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1706 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1707 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1708
1709 switch (Op) {
1711 llvm_unreachable("Should not have BAD_BINOP.");
1713 return ArrayRef(LibcallsXchg);
1714 case AtomicRMWInst::Add:
1715 return ArrayRef(LibcallsAdd);
1716 case AtomicRMWInst::Sub:
1717 return ArrayRef(LibcallsSub);
1718 case AtomicRMWInst::And:
1719 return ArrayRef(LibcallsAnd);
1720 case AtomicRMWInst::Or:
1721 return ArrayRef(LibcallsOr);
1722 case AtomicRMWInst::Xor:
1723 return ArrayRef(LibcallsXor);
1725 return ArrayRef(LibcallsNand);
1726 case AtomicRMWInst::Max:
1727 case AtomicRMWInst::Min:
1736 // No atomic libcalls are available for max/min/umax/umin.
1737 return {};
1738 }
1739 llvm_unreachable("Unexpected AtomicRMW operation.");
1740}
1741
1742void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1743 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1744
1745 unsigned Size = getAtomicOpSize(I);
1746
1747 bool Success = false;
1748 if (!Libcalls.empty())
1749 Success = expandAtomicOpToLibcall(
1750 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1751 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1752
1753 // The expansion failed: either there were no libcalls at all for
1754 // the operation (min/max), or there were only size-specialized
1755 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1756 // CAS libcall, via a CAS loop, instead.
1757 if (!Success) {
1759 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1760 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1761 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1762 // Create the CAS instruction normally...
1763 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1764 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1766 Success = Builder.CreateExtractValue(Pair, 1, "success");
1767 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1768
1769 // ...and then expand the CAS into a libcall.
1770 expandAtomicCASToLibcall(Pair);
1771 });
1772 }
1773}
1774
1775// A helper routine for the above expandAtomic*ToLibcall functions.
1776//
1777// 'Libcalls' contains an array of enum values for the particular
1778// ATOMIC libcalls to be emitted. All of the other arguments besides
1779// 'I' are extracted from the Instruction subclass by the
1780// caller. Depending on the particular call, some will be null.
1781bool AtomicExpandImpl::expandAtomicOpToLibcall(
1782 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1783 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1784 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1785 assert(Libcalls.size() == 6);
1786
1787 LLVMContext &Ctx = I->getContext();
1788 Module *M = I->getModule();
1789 const DataLayout &DL = M->getDataLayout();
1790 IRBuilder<> Builder(I);
1791 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1792
1793 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1794 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1795
1796 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1797
1798 // TODO: the "order" argument type is "int", not int32. So
1799 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1800 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1801 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1802 Constant *OrderingVal =
1803 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1804 Constant *Ordering2Val = nullptr;
1805 if (CASExpected) {
1806 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1807 Ordering2Val =
1808 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1809 }
1810 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1811
1812 RTLIB::Libcall RTLibType;
1813 if (UseSizedLibcall) {
1814 switch (Size) {
1815 case 1:
1816 RTLibType = Libcalls[1];
1817 break;
1818 case 2:
1819 RTLibType = Libcalls[2];
1820 break;
1821 case 4:
1822 RTLibType = Libcalls[3];
1823 break;
1824 case 8:
1825 RTLibType = Libcalls[4];
1826 break;
1827 case 16:
1828 RTLibType = Libcalls[5];
1829 break;
1830 }
1831 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1832 RTLibType = Libcalls[0];
1833 } else {
1834 // Can't use sized function, and there's no generic for this
1835 // operation, so give up.
1836 return false;
1837 }
1838
1839 if (!TLI->getLibcallName(RTLibType)) {
1840 // This target does not implement the requested atomic libcall so give up.
1841 return false;
1842 }
1843
1844 // Build up the function call. There's two kinds. First, the sized
1845 // variants. These calls are going to be one of the following (with
1846 // N=1,2,4,8,16):
1847 // iN __atomic_load_N(iN *ptr, int ordering)
1848 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1849 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1850 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1851 // int success_order, int failure_order)
1852 //
1853 // Note that these functions can be used for non-integer atomic
1854 // operations, the values just need to be bitcast to integers on the
1855 // way in and out.
1856 //
1857 // And, then, the generic variants. They look like the following:
1858 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1859 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1860 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1861 // int ordering)
1862 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1863 // void *desired, int success_order,
1864 // int failure_order)
1865 //
1866 // The different signatures are built up depending on the
1867 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1868 // variables.
1869
1870 AllocaInst *AllocaCASExpected = nullptr;
1871 AllocaInst *AllocaValue = nullptr;
1872 AllocaInst *AllocaResult = nullptr;
1873
1874 Type *ResultTy;
1876 AttributeList Attr;
1877
1878 // 'size' argument.
1879 if (!UseSizedLibcall) {
1880 // Note, getIntPtrType is assumed equivalent to size_t.
1881 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1882 }
1883
1884 // 'ptr' argument.
1885 // note: This assumes all address spaces share a common libfunc
1886 // implementation and that addresses are convertable. For systems without
1887 // that property, we'd need to extend this mechanism to support AS-specific
1888 // families of atomic intrinsics.
1889 Value *PtrVal = PointerOperand;
1890 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1891 Args.push_back(PtrVal);
1892
1893 // 'expected' argument, if present.
1894 if (CASExpected) {
1895 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1896 AllocaCASExpected->setAlignment(AllocaAlignment);
1897 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1898 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1899 Args.push_back(AllocaCASExpected);
1900 }
1901
1902 // 'val' argument ('desired' for cas), if present.
1903 if (ValueOperand) {
1904 if (UseSizedLibcall) {
1905 Value *IntValue =
1906 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1907 Args.push_back(IntValue);
1908 } else {
1909 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1910 AllocaValue->setAlignment(AllocaAlignment);
1911 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1912 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1913 Args.push_back(AllocaValue);
1914 }
1915 }
1916
1917 // 'ret' argument.
1918 if (!CASExpected && HasResult && !UseSizedLibcall) {
1919 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1920 AllocaResult->setAlignment(AllocaAlignment);
1921 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1922 Args.push_back(AllocaResult);
1923 }
1924
1925 // 'ordering' ('success_order' for cas) argument.
1926 Args.push_back(OrderingVal);
1927
1928 // 'failure_order' argument, if present.
1929 if (Ordering2Val)
1930 Args.push_back(Ordering2Val);
1931
1932 // Now, the return type.
1933 if (CASExpected) {
1934 ResultTy = Type::getInt1Ty(Ctx);
1935 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1936 } else if (HasResult && UseSizedLibcall)
1937 ResultTy = SizedIntTy;
1938 else
1939 ResultTy = Type::getVoidTy(Ctx);
1940
1941 // Done with setting up arguments and return types, create the call:
1943 for (Value *Arg : Args)
1944 ArgTys.push_back(Arg->getType());
1945 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1946 FunctionCallee LibcallFn =
1947 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1948 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1949 Call->setAttributes(Attr);
1950 Value *Result = Call;
1951
1952 // And then, extract the results...
1953 if (ValueOperand && !UseSizedLibcall)
1954 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
1955
1956 if (CASExpected) {
1957 // The final result from the CAS is {load of 'expected' alloca, bool result
1958 // from call}
1959 Type *FinalResultTy = I->getType();
1960 Value *V = PoisonValue::get(FinalResultTy);
1961 Value *ExpectedOut = Builder.CreateAlignedLoad(
1962 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1963 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
1964 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1965 V = Builder.CreateInsertValue(V, Result, 1);
1966 I->replaceAllUsesWith(V);
1967 } else if (HasResult) {
1968 Value *V;
1969 if (UseSizedLibcall)
1970 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1971 else {
1972 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1973 AllocaAlignment);
1974 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
1975 }
1976 I->replaceAllUsesWith(V);
1977 }
1978 I->eraseFromParent();
1979 return true;
1980}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:651
void setWeak(bool IsWeak)
Definition: Instructions.h:608
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:599
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:588
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:606
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:603
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:626
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:664
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:877
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ FSub
*p = old - v
Definition: Instructions.h:788
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:796
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:792
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
Value * getPointerOperand()
Definition: Instructions.h:910
BinOp getOperation() const
Definition: Instructions.h:845
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:901
Value * getValOperand()
Definition: Instructions.h:914
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:567
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:557
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1840
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2506
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:533
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1806
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1257
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:481
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2228
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:227
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2188
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1748
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2224
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1789
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1853
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:496
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1108
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2144
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2115
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:230
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:266
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:233
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:295
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:364
void setAlignment(Align Align)
Definition: Instructions.h:373
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:400
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:41
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397