LLVM 22.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
70 LLVMContext &Ctx = FailedInst.getContext();
71
72 // TODO: Do not use generic error type.
73 Ctx.emitError(&FailedInst, Msg);
74
75 if (!FailedInst.getType()->isVoidTy())
76 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
77 FailedInst.eraseFromParent();
78 }
79
80 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
81 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
82 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
83 bool tryExpandAtomicLoad(LoadInst *LI);
84 bool expandAtomicLoadToLL(LoadInst *LI);
85 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
86 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
87 bool tryExpandAtomicStore(StoreInst *SI);
88 void expandAtomicStoreToXChg(StoreInst *SI);
89 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
90 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
91 Value *
92 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
93 Align AddrAlign, AtomicOrdering MemOpOrder,
94 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
95 void expandAtomicOpToLLSC(
96 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
97 AtomicOrdering MemOpOrder,
98 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
99 void expandPartwordAtomicRMW(
101 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
102 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
103 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
104 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
105
106 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
107 static Value *insertRMWCmpXchgLoop(
108 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
109 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
110 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
111 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
112 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
113
114 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
115 bool isIdempotentRMW(AtomicRMWInst *RMWI);
116 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
117
118 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
119 Value *PointerOperand, Value *ValueOperand,
120 Value *CASExpected, AtomicOrdering Ordering,
121 AtomicOrdering Ordering2,
122 ArrayRef<RTLIB::Libcall> Libcalls);
123 void expandAtomicLoadToLibcall(LoadInst *LI);
124 void expandAtomicStoreToLibcall(StoreInst *LI);
125 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
126 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
127
128 friend bool
130 CreateCmpXchgInstFun CreateCmpXchg);
131
132 bool processAtomicInstr(Instruction *I);
133
134public:
135 bool run(Function &F, const TargetMachine *TM);
136};
137
138class AtomicExpandLegacy : public FunctionPass {
139public:
140 static char ID; // Pass identification, replacement for typeid
141
142 AtomicExpandLegacy() : FunctionPass(ID) {
144 }
145
146 bool runOnFunction(Function &F) override;
147};
148
149// IRBuilder to be used for replacement atomic instructions.
150struct ReplacementIRBuilder
151 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
152 MDNode *MMRAMD = nullptr;
153
154 // Preserves the DebugLoc from I, and preserves still valid metadata.
155 // Enable StrictFP builder mode when appropriate.
156 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
157 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
159 [this](Instruction *I) { addMMRAMD(I); })) {
160 SetInsertPoint(I);
161 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
162 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
163 this->setIsFPConstrained(true);
164
165 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
166 }
167
168 void addMMRAMD(Instruction *I) {
170 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
171 }
172};
173
174} // end anonymous namespace
175
176char AtomicExpandLegacy::ID = 0;
177
178char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
179
181 "Expand Atomic instructions", false, false)
183INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
184 "Expand Atomic instructions", false, false)
185
186// Helper functions to retrieve the size of atomic instructions.
187static unsigned getAtomicOpSize(LoadInst *LI) {
188 const DataLayout &DL = LI->getDataLayout();
189 return DL.getTypeStoreSize(LI->getType());
190}
191
192static unsigned getAtomicOpSize(StoreInst *SI) {
193 const DataLayout &DL = SI->getDataLayout();
194 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
195}
196
197static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
198 const DataLayout &DL = RMWI->getDataLayout();
199 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
200}
201
202static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
203 const DataLayout &DL = CASI->getDataLayout();
204 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
205}
206
207/// Copy metadata that's safe to preserve when widening atomics.
209 const Instruction &Source) {
211 Source.getAllMetadata(MD);
212 LLVMContext &Ctx = Dest.getContext();
213 MDBuilder MDB(Ctx);
214
215 for (auto [ID, N] : MD) {
216 switch (ID) {
217 case LLVMContext::MD_dbg:
218 case LLVMContext::MD_tbaa:
219 case LLVMContext::MD_tbaa_struct:
220 case LLVMContext::MD_alias_scope:
221 case LLVMContext::MD_noalias:
222 case LLVMContext::MD_noalias_addrspace:
223 case LLVMContext::MD_access_group:
224 case LLVMContext::MD_mmra:
225 Dest.setMetadata(ID, N);
226 break;
227 default:
228 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
229 Dest.setMetadata(ID, N);
230 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
231 Dest.setMetadata(ID, N);
232
233 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
234 // uses.
235 break;
236 }
237 }
238}
239
240// Determine if a particular atomic operation has a supported size,
241// and is of appropriate alignment, to be passed through for target
242// lowering. (Versus turning into a __atomic libcall)
243template <typename Inst>
244static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
245 unsigned Size = getAtomicOpSize(I);
246 Align Alignment = I->getAlign();
247 return Alignment >= Size &&
249}
250
251bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
252 auto *LI = dyn_cast<LoadInst>(I);
253 auto *SI = dyn_cast<StoreInst>(I);
254 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
255 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
256
257 bool MadeChange = false;
258
259 // If the Size/Alignment is not supported, replace with a libcall.
260 if (LI) {
261 if (!LI->isAtomic())
262 return false;
263
264 if (!atomicSizeSupported(TLI, LI)) {
265 expandAtomicLoadToLibcall(LI);
266 return true;
267 }
268
269 if (TLI->shouldCastAtomicLoadInIR(LI) ==
270 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
271 I = LI = convertAtomicLoadToIntegerType(LI);
272 MadeChange = true;
273 }
274 } else if (SI) {
275 if (!SI->isAtomic())
276 return false;
277
278 if (!atomicSizeSupported(TLI, SI)) {
279 expandAtomicStoreToLibcall(SI);
280 return true;
281 }
282
283 if (TLI->shouldCastAtomicStoreInIR(SI) ==
284 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
285 I = SI = convertAtomicStoreToIntegerType(SI);
286 MadeChange = true;
287 }
288 } else if (RMWI) {
289 if (!atomicSizeSupported(TLI, RMWI)) {
290 expandAtomicRMWToLibcall(RMWI);
291 return true;
292 }
293
294 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
295 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
296 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
297 MadeChange = true;
298 }
299 } else if (CASI) {
300 if (!atomicSizeSupported(TLI, CASI)) {
301 expandAtomicCASToLibcall(CASI);
302 return true;
303 }
304
305 // TODO: when we're ready to make the change at the IR level, we can
306 // extend convertCmpXchgToInteger for floating point too.
307 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
308 // TODO: add a TLI hook to control this so that each target can
309 // convert to lowering the original type one at a time.
310 I = CASI = convertCmpXchgToIntegerType(CASI);
311 MadeChange = true;
312 }
313 } else
314 return false;
315
316 if (TLI->shouldInsertFencesForAtomic(I)) {
317 auto FenceOrdering = AtomicOrdering::Monotonic;
318 if (LI && isAcquireOrStronger(LI->getOrdering())) {
319 FenceOrdering = LI->getOrdering();
320 LI->setOrdering(AtomicOrdering::Monotonic);
321 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
322 FenceOrdering = SI->getOrdering();
323 SI->setOrdering(AtomicOrdering::Monotonic);
324 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
325 isAcquireOrStronger(RMWI->getOrdering()))) {
326 FenceOrdering = RMWI->getOrdering();
327 RMWI->setOrdering(AtomicOrdering::Monotonic);
328 } else if (CASI &&
330 TargetLoweringBase::AtomicExpansionKind::None &&
331 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
332 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
333 isAcquireOrStronger(CASI->getFailureOrdering()))) {
334 // If a compare and swap is lowered to LL/SC, we can do smarter fence
335 // insertion, with a stronger one on the success path than on the
336 // failure path. As a result, fence insertion is directly done by
337 // expandAtomicCmpXchg in that case.
338 FenceOrdering = CASI->getMergedOrdering();
339 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
340
341 CASI->setSuccessOrdering(CASOrdering);
342 CASI->setFailureOrdering(CASOrdering);
343 }
344
345 if (FenceOrdering != AtomicOrdering::Monotonic) {
346 MadeChange |= bracketInstWithFences(I, FenceOrdering);
347 }
349 !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
350 TargetLoweringBase::AtomicExpansionKind::LLSC)) {
351 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
352 IRBuilder Builder(I);
353 if (auto TrailingFence = TLI->emitTrailingFence(
354 Builder, I, AtomicOrdering::SequentiallyConsistent)) {
355 TrailingFence->moveAfter(I);
356 MadeChange = true;
357 }
358 }
359
360 if (LI)
361 MadeChange |= tryExpandAtomicLoad(LI);
362 else if (SI)
363 MadeChange |= tryExpandAtomicStore(SI);
364 else if (RMWI) {
365 // There are two different ways of expanding RMW instructions:
366 // - into a load if it is idempotent
367 // - into a Cmpxchg/LL-SC loop otherwise
368 // we try them in that order.
369
370 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
371 MadeChange = true;
372
373 } else {
374 MadeChange |= tryExpandAtomicRMW(RMWI);
375 }
376 } else if (CASI)
377 MadeChange |= tryExpandAtomicCmpXchg(CASI);
378
379 return MadeChange;
380}
381
382bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
383 const auto *Subtarget = TM->getSubtargetImpl(F);
384 if (!Subtarget->enableAtomicExpand())
385 return false;
386 TLI = Subtarget->getTargetLowering();
387 DL = &F.getDataLayout();
388
389 bool MadeChange = false;
390
391 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
392 BasicBlock *BB = &*BBI;
393
395
396 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
397 I = Next) {
398 Instruction &Inst = *I;
399 Next = std::next(I);
400
401 if (processAtomicInstr(&Inst)) {
402 MadeChange = true;
403
404 // New blocks may have been inserted.
405 BBE = F.end();
406 }
407 }
408 }
409
410 return MadeChange;
411}
412
413bool AtomicExpandLegacy::runOnFunction(Function &F) {
414
415 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
416 if (!TPC)
417 return false;
418 auto *TM = &TPC->getTM<TargetMachine>();
419 AtomicExpandImpl AE;
420 return AE.run(F, TM);
421}
422
424 return new AtomicExpandLegacy();
425}
426
429 AtomicExpandImpl AE;
430
431 bool Changed = AE.run(F, TM);
432 if (!Changed)
433 return PreservedAnalyses::all();
434
436}
437
438bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
439 AtomicOrdering Order) {
440 ReplacementIRBuilder Builder(I, *DL);
441
442 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
443
444 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
445 // We have a guard here because not every atomic operation generates a
446 // trailing fence.
447 if (TrailingFence)
448 TrailingFence->moveAfter(I);
449
450 return (LeadingFence || TrailingFence);
451}
452
453/// Get the iX type with the same bitwidth as T.
455AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
456 EVT VT = TLI->getMemValueType(DL, T);
457 unsigned BitWidth = VT.getStoreSizeInBits();
458 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
459 return IntegerType::get(T->getContext(), BitWidth);
460}
461
462/// Convert an atomic load of a non-integral type to an integer load of the
463/// equivalent bitwidth. See the function comment on
464/// convertAtomicStoreToIntegerType for background.
465LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
466 auto *M = LI->getModule();
467 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
468
469 ReplacementIRBuilder Builder(LI, *DL);
470
471 Value *Addr = LI->getPointerOperand();
472
473 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
474 NewLI->setAlignment(LI->getAlign());
475 NewLI->setVolatile(LI->isVolatile());
476 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
477 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
478
479 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
480 LI->replaceAllUsesWith(NewVal);
481 LI->eraseFromParent();
482 return NewLI;
483}
484
485AtomicRMWInst *
486AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
488
489 auto *M = RMWI->getModule();
490 Type *NewTy =
491 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
492
493 ReplacementIRBuilder Builder(RMWI, *DL);
494
495 Value *Addr = RMWI->getPointerOperand();
496 Value *Val = RMWI->getValOperand();
497 Value *NewVal = Val->getType()->isPointerTy()
498 ? Builder.CreatePtrToInt(Val, NewTy)
499 : Builder.CreateBitCast(Val, NewTy);
500
501 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
502 RMWI->getAlign(), RMWI->getOrdering(),
503 RMWI->getSyncScopeID());
504 NewRMWI->setVolatile(RMWI->isVolatile());
505 copyMetadataForAtomic(*NewRMWI, *RMWI);
506 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
507
508 Value *NewRVal = RMWI->getType()->isPointerTy()
509 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
510 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
511 RMWI->replaceAllUsesWith(NewRVal);
512 RMWI->eraseFromParent();
513 return NewRMWI;
514}
515
516bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
517 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
518 case TargetLoweringBase::AtomicExpansionKind::None:
519 return false;
520 case TargetLoweringBase::AtomicExpansionKind::LLSC:
521 expandAtomicOpToLLSC(
522 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
523 LI->getOrdering(),
524 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
525 return true;
526 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
527 return expandAtomicLoadToLL(LI);
528 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
529 return expandAtomicLoadToCmpXchg(LI);
530 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
531 LI->setAtomic(AtomicOrdering::NotAtomic);
532 return true;
533 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
534 TLI->emitExpandAtomicLoad(LI);
535 return true;
536 default:
537 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
538 }
539}
540
541bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
542 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
543 case TargetLoweringBase::AtomicExpansionKind::None:
544 return false;
545 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
546 TLI->emitExpandAtomicStore(SI);
547 return true;
548 case TargetLoweringBase::AtomicExpansionKind::Expand:
549 expandAtomicStoreToXChg(SI);
550 return true;
551 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
552 SI->setAtomic(AtomicOrdering::NotAtomic);
553 return true;
554 default:
555 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
556 }
557}
558
559bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
560 ReplacementIRBuilder Builder(LI, *DL);
561
562 // On some architectures, load-linked instructions are atomic for larger
563 // sizes than normal loads. For example, the only 64-bit load guaranteed
564 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
565 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
566 LI->getPointerOperand(), LI->getOrdering());
568
569 LI->replaceAllUsesWith(Val);
570 LI->eraseFromParent();
571
572 return true;
573}
574
575bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
576 ReplacementIRBuilder Builder(LI, *DL);
577 AtomicOrdering Order = LI->getOrdering();
578 if (Order == AtomicOrdering::Unordered)
579 Order = AtomicOrdering::Monotonic;
580
581 Value *Addr = LI->getPointerOperand();
582 Type *Ty = LI->getType();
583 Constant *DummyVal = Constant::getNullValue(Ty);
584
585 Value *Pair = Builder.CreateAtomicCmpXchg(
586 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
588 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
589
590 LI->replaceAllUsesWith(Loaded);
591 LI->eraseFromParent();
592
593 return true;
594}
595
596/// Convert an atomic store of a non-integral type to an integer store of the
597/// equivalent bitwidth. We used to not support floating point or vector
598/// atomics in the IR at all. The backends learned to deal with the bitcast
599/// idiom because that was the only way of expressing the notion of a atomic
600/// float or vector store. The long term plan is to teach each backend to
601/// instruction select from the original atomic store, but as a migration
602/// mechanism, we convert back to the old format which the backends understand.
603/// Each backend will need individual work to recognize the new format.
604StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
605 ReplacementIRBuilder Builder(SI, *DL);
606 auto *M = SI->getModule();
607 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
608 M->getDataLayout());
609 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
610
611 Value *Addr = SI->getPointerOperand();
612
613 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
614 NewSI->setAlignment(SI->getAlign());
615 NewSI->setVolatile(SI->isVolatile());
616 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
617 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
618 SI->eraseFromParent();
619 return NewSI;
620}
621
622void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
623 // This function is only called on atomic stores that are too large to be
624 // atomic if implemented as a native store. So we replace them by an
625 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
626 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
627 // It is the responsibility of the target to only signal expansion via
628 // shouldExpandAtomicRMW in cases where this is required and possible.
629 ReplacementIRBuilder Builder(SI, *DL);
630 AtomicOrdering Ordering = SI->getOrdering();
631 assert(Ordering != AtomicOrdering::NotAtomic);
632 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
633 ? AtomicOrdering::Monotonic
634 : Ordering;
635 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
636 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
637 SI->getAlign(), RMWOrdering);
638 SI->eraseFromParent();
639
640 // Now we have an appropriate swap instruction, lower it as usual.
641 tryExpandAtomicRMW(AI);
642}
643
644static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
645 Value *Loaded, Value *NewVal, Align AddrAlign,
646 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
647 Value *&Success, Value *&NewLoaded,
648 Instruction *MetadataSrc) {
649 Type *OrigTy = NewVal->getType();
650
651 // This code can go away when cmpxchg supports FP and vector types.
652 assert(!OrigTy->isPointerTy());
653 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
654 if (NeedBitcast) {
655 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
656 NewVal = Builder.CreateBitCast(NewVal, IntTy);
657 Loaded = Builder.CreateBitCast(Loaded, IntTy);
658 }
659
660 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
661 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
663 if (MetadataSrc)
664 copyMetadataForAtomic(*Pair, *MetadataSrc);
665
666 Success = Builder.CreateExtractValue(Pair, 1, "success");
667 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
668
669 if (NeedBitcast)
670 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
671}
672
673bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
674 LLVMContext &Ctx = AI->getModule()->getContext();
675 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
676 switch (Kind) {
677 case TargetLoweringBase::AtomicExpansionKind::None:
678 return false;
679 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
680 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
681 unsigned ValueSize = getAtomicOpSize(AI);
682 if (ValueSize < MinCASSize) {
683 expandPartwordAtomicRMW(AI,
684 TargetLoweringBase::AtomicExpansionKind::LLSC);
685 } else {
686 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
687 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
688 AI->getValOperand());
689 };
690 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
691 AI->getAlign(), AI->getOrdering(), PerformOp);
692 }
693 return true;
694 }
695 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
696 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
697 unsigned ValueSize = getAtomicOpSize(AI);
698 if (ValueSize < MinCASSize) {
699 expandPartwordAtomicRMW(AI,
700 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
701 } else {
703 Ctx.getSyncScopeNames(SSNs);
704 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
705 ? "system"
706 : SSNs[AI->getSyncScopeID()];
707 OptimizationRemarkEmitter ORE(AI->getFunction());
708 ORE.emit([&]() {
709 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
710 << "A compare and swap loop was generated for an atomic "
711 << AI->getOperationName(AI->getOperation()) << " operation at "
712 << MemScope << " memory scope";
713 });
715 }
716 return true;
717 }
718 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
719 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
720 unsigned ValueSize = getAtomicOpSize(AI);
721 if (ValueSize < MinCASSize) {
723 // Widen And/Or/Xor and give the target another chance at expanding it.
726 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
727 return true;
728 }
729 }
730 expandAtomicRMWToMaskedIntrinsic(AI);
731 return true;
732 }
733 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
735 return true;
736 }
737 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
739 return true;
740 }
741 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
742 return lowerAtomicRMWInst(AI);
743 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
744 TLI->emitExpandAtomicRMW(AI);
745 return true;
746 default:
747 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
748 }
749}
750
751namespace {
752
753struct PartwordMaskValues {
754 // These three fields are guaranteed to be set by createMaskInstrs.
755 Type *WordType = nullptr;
756 Type *ValueType = nullptr;
757 Type *IntValueType = nullptr;
758 Value *AlignedAddr = nullptr;
759 Align AlignedAddrAlignment;
760 // The remaining fields can be null.
761 Value *ShiftAmt = nullptr;
762 Value *Mask = nullptr;
763 Value *Inv_Mask = nullptr;
764};
765
766[[maybe_unused]]
767raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
768 auto PrintObj = [&O](auto *V) {
769 if (V)
770 O << *V;
771 else
772 O << "nullptr";
773 O << '\n';
774 };
775 O << "PartwordMaskValues {\n";
776 O << " WordType: ";
777 PrintObj(PMV.WordType);
778 O << " ValueType: ";
779 PrintObj(PMV.ValueType);
780 O << " AlignedAddr: ";
781 PrintObj(PMV.AlignedAddr);
782 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
783 O << " ShiftAmt: ";
784 PrintObj(PMV.ShiftAmt);
785 O << " Mask: ";
786 PrintObj(PMV.Mask);
787 O << " Inv_Mask: ";
788 PrintObj(PMV.Inv_Mask);
789 O << "}\n";
790 return O;
791}
792
793} // end anonymous namespace
794
795/// This is a helper function which builds instructions to provide
796/// values necessary for partword atomic operations. It takes an
797/// incoming address, Addr, and ValueType, and constructs the address,
798/// shift-amounts and masks needed to work with a larger value of size
799/// WordSize.
800///
801/// AlignedAddr: Addr rounded down to a multiple of WordSize
802///
803/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
804/// from AlignAddr for it to have the same value as if
805/// ValueType was loaded from Addr.
806///
807/// Mask: Value to mask with the value loaded from AlignAddr to
808/// include only the part that would've been loaded from Addr.
809///
810/// Inv_Mask: The inverse of Mask.
811static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
813 Value *Addr, Align AddrAlign,
814 unsigned MinWordSize) {
815 PartwordMaskValues PMV;
816
817 Module *M = I->getModule();
818 LLVMContext &Ctx = M->getContext();
819 const DataLayout &DL = M->getDataLayout();
820 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
821
822 PMV.ValueType = PMV.IntValueType = ValueType;
823 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
824 PMV.IntValueType =
825 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
826
827 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
828 : ValueType;
829 if (PMV.ValueType == PMV.WordType) {
830 PMV.AlignedAddr = Addr;
831 PMV.AlignedAddrAlignment = AddrAlign;
832 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
833 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
834 return PMV;
835 }
836
837 PMV.AlignedAddrAlignment = Align(MinWordSize);
838
839 assert(ValueSize < MinWordSize);
840
841 PointerType *PtrTy = cast<PointerType>(Addr->getType());
842 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
843 Value *PtrLSB;
844
845 if (AddrAlign < MinWordSize) {
846 PMV.AlignedAddr = Builder.CreateIntrinsic(
847 Intrinsic::ptrmask, {PtrTy, IntTy},
848 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
849 nullptr, "AlignedAddr");
850
851 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
852 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
853 } else {
854 // If the alignment is high enough, the LSB are known 0.
855 PMV.AlignedAddr = Addr;
856 PtrLSB = ConstantInt::getNullValue(IntTy);
857 }
858
859 if (DL.isLittleEndian()) {
860 // turn bytes into bits
861 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
862 } else {
863 // turn bytes into bits, and count from the other side.
864 PMV.ShiftAmt = Builder.CreateShl(
865 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
866 }
867
868 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
869 PMV.Mask = Builder.CreateShl(
870 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
871 "Mask");
872
873 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
874
875 return PMV;
876}
877
878static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
879 const PartwordMaskValues &PMV) {
880 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
881 if (PMV.WordType == PMV.ValueType)
882 return WideWord;
883
884 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
885 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
886 return Builder.CreateBitCast(Trunc, PMV.ValueType);
887}
888
889static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
890 Value *Updated, const PartwordMaskValues &PMV) {
891 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
892 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
893 if (PMV.WordType == PMV.ValueType)
894 return Updated;
895
896 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
897
898 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
899 Value *Shift =
900 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
901 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
902 Value *Or = Builder.CreateOr(And, Shift, "inserted");
903 return Or;
904}
905
906/// Emit IR to implement a masked version of a given atomicrmw
907/// operation. (That is, only the bits under the Mask should be
908/// affected by the operation)
910 IRBuilderBase &Builder, Value *Loaded,
911 Value *Shifted_Inc, Value *Inc,
912 const PartwordMaskValues &PMV) {
913 // TODO: update to use
914 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
915 // to merge bits from two values without requiring PMV.Inv_Mask.
916 switch (Op) {
917 case AtomicRMWInst::Xchg: {
918 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
919 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
920 return FinalVal;
921 }
925 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
928 case AtomicRMWInst::Nand: {
929 // The other arithmetic ops need to be masked into place.
930 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
931 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
932 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
933 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
934 return FinalVal;
935 }
950 // Finally, other ops will operate on the full value, so truncate down to
951 // the original size, and expand out again after doing the
952 // operation. Bitcasts will be inserted for FP values.
953 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
954 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
955 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
956 return FinalVal;
957 }
958 default:
959 llvm_unreachable("Unknown atomic op");
960 }
961}
962
963/// Expand a sub-word atomicrmw operation into an appropriate
964/// word-sized operation.
965///
966/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
967/// way as a typical atomicrmw expansion. The only difference here is
968/// that the operation inside of the loop may operate upon only a
969/// part of the value.
970void AtomicExpandImpl::expandPartwordAtomicRMW(
971 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
972 // Widen And/Or/Xor and give the target another chance at expanding it.
976 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
977 return;
978 }
979 AtomicOrdering MemOpOrder = AI->getOrdering();
980 SyncScope::ID SSID = AI->getSyncScopeID();
981
982 ReplacementIRBuilder Builder(AI, *DL);
983
984 PartwordMaskValues PMV =
985 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
986 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
987
988 Value *ValOperand_Shifted = nullptr;
991 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
992 ValOperand_Shifted =
993 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
994 "ValOperand_Shifted");
995 }
996
997 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
998 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
999 AI->getValOperand(), PMV);
1000 };
1001
1002 Value *OldResult;
1003 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1004 OldResult = insertRMWCmpXchgLoop(
1005 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1006 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1007 } else {
1008 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1009 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1010 PMV.AlignedAddrAlignment, MemOpOrder,
1011 PerformPartwordOp);
1012 }
1013
1014 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1015 AI->replaceAllUsesWith(FinalOldResult);
1016 AI->eraseFromParent();
1017}
1018
1019// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1020AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1021 ReplacementIRBuilder Builder(AI, *DL);
1023
1025 Op == AtomicRMWInst::And) &&
1026 "Unable to widen operation");
1027
1028 PartwordMaskValues PMV =
1029 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1030 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1031
1032 Value *ValOperand_Shifted =
1033 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1034 PMV.ShiftAmt, "ValOperand_Shifted");
1035
1036 Value *NewOperand;
1037
1038 if (Op == AtomicRMWInst::And)
1039 NewOperand =
1040 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1041 else
1042 NewOperand = ValOperand_Shifted;
1043
1044 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1045 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1046 AI->getOrdering(), AI->getSyncScopeID());
1047
1048 copyMetadataForAtomic(*NewAI, *AI);
1049
1050 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1051 AI->replaceAllUsesWith(FinalOldResult);
1052 AI->eraseFromParent();
1053 return NewAI;
1054}
1055
1056bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1057 // The basic idea here is that we're expanding a cmpxchg of a
1058 // smaller memory size up to a word-sized cmpxchg. To do this, we
1059 // need to add a retry-loop for strong cmpxchg, so that
1060 // modifications to other parts of the word don't cause a spurious
1061 // failure.
1062
1063 // This generates code like the following:
1064 // [[Setup mask values PMV.*]]
1065 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1066 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1067 // %InitLoaded = load i32* %addr
1068 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1069 // br partword.cmpxchg.loop
1070 // partword.cmpxchg.loop:
1071 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1072 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1073 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1074 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1075 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1076 // i32 %FullWord_NewVal success_ordering failure_ordering
1077 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1078 // %Success = extractvalue { i32, i1 } %NewCI, 1
1079 // br i1 %Success, label %partword.cmpxchg.end,
1080 // label %partword.cmpxchg.failure
1081 // partword.cmpxchg.failure:
1082 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1083 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1084 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1085 // label %partword.cmpxchg.end
1086 // partword.cmpxchg.end:
1087 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1088 // %FinalOldVal = trunc i32 %tmp1 to i8
1089 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1090 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1091
1092 Value *Addr = CI->getPointerOperand();
1093 Value *Cmp = CI->getCompareOperand();
1094 Value *NewVal = CI->getNewValOperand();
1095
1096 BasicBlock *BB = CI->getParent();
1097 Function *F = BB->getParent();
1098 ReplacementIRBuilder Builder(CI, *DL);
1099 LLVMContext &Ctx = Builder.getContext();
1100
1101 BasicBlock *EndBB =
1102 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1103 auto FailureBB =
1104 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1105 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1106
1107 // The split call above "helpfully" added a branch at the end of BB
1108 // (to the wrong place).
1109 std::prev(BB->end())->eraseFromParent();
1110 Builder.SetInsertPoint(BB);
1111
1112 PartwordMaskValues PMV =
1113 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1114 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1115
1116 // Shift the incoming values over, into the right location in the word.
1117 Value *NewVal_Shifted =
1118 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1119 Value *Cmp_Shifted =
1120 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1121
1122 // Load the entire current word, and mask into place the expected and new
1123 // values
1124 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1125 InitLoaded->setVolatile(CI->isVolatile());
1126 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1127 Builder.CreateBr(LoopBB);
1128
1129 // partword.cmpxchg.loop:
1130 Builder.SetInsertPoint(LoopBB);
1131 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1132 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1133
1134 // Mask/Or the expected and new values into place in the loaded word.
1135 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1136 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1137 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1138 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1140 NewCI->setVolatile(CI->isVolatile());
1141 // When we're building a strong cmpxchg, we need a loop, so you
1142 // might think we could use a weak cmpxchg inside. But, using strong
1143 // allows the below comparison for ShouldContinue, and we're
1144 // expecting the underlying cmpxchg to be a machine instruction,
1145 // which is strong anyways.
1146 NewCI->setWeak(CI->isWeak());
1147
1148 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1149 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1150
1151 if (CI->isWeak())
1152 Builder.CreateBr(EndBB);
1153 else
1154 Builder.CreateCondBr(Success, EndBB, FailureBB);
1155
1156 // partword.cmpxchg.failure:
1157 Builder.SetInsertPoint(FailureBB);
1158 // Upon failure, verify that the masked-out part of the loaded value
1159 // has been modified. If it didn't, abort the cmpxchg, since the
1160 // masked-in part must've.
1161 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1162 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1163 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1164
1165 // Add the second value to the phi from above
1166 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1167
1168 // partword.cmpxchg.end:
1169 Builder.SetInsertPoint(CI);
1170
1171 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1172 Value *Res = PoisonValue::get(CI->getType());
1173 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1174 Res = Builder.CreateInsertValue(Res, Success, 1);
1175
1176 CI->replaceAllUsesWith(Res);
1177 CI->eraseFromParent();
1178 return true;
1179}
1180
1181void AtomicExpandImpl::expandAtomicOpToLLSC(
1182 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1183 AtomicOrdering MemOpOrder,
1184 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1185 ReplacementIRBuilder Builder(I, *DL);
1186 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1187 MemOpOrder, PerformOp);
1188
1189 I->replaceAllUsesWith(Loaded);
1190 I->eraseFromParent();
1191}
1192
1193void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1194 ReplacementIRBuilder Builder(AI, *DL);
1195
1196 PartwordMaskValues PMV =
1197 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1198 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1199
1200 // The value operand must be sign-extended for signed min/max so that the
1201 // target's signed comparison instructions can be used. Otherwise, just
1202 // zero-ext.
1203 Instruction::CastOps CastOp = Instruction::ZExt;
1204 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1205 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1206 CastOp = Instruction::SExt;
1207
1208 Value *ValOperand_Shifted = Builder.CreateShl(
1209 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1210 PMV.ShiftAmt, "ValOperand_Shifted");
1211 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1212 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1213 AI->getOrdering());
1214 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1215 AI->replaceAllUsesWith(FinalOldResult);
1216 AI->eraseFromParent();
1217}
1218
1219void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1220 AtomicCmpXchgInst *CI) {
1221 ReplacementIRBuilder Builder(CI, *DL);
1222
1223 PartwordMaskValues PMV = createMaskInstrs(
1224 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1225 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1226
1227 Value *CmpVal_Shifted = Builder.CreateShl(
1228 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1229 "CmpVal_Shifted");
1230 Value *NewVal_Shifted = Builder.CreateShl(
1231 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1232 "NewVal_Shifted");
1234 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1235 CI->getMergedOrdering());
1236 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1237 Value *Res = PoisonValue::get(CI->getType());
1238 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1239 Value *Success = Builder.CreateICmpEQ(
1240 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1241 Res = Builder.CreateInsertValue(Res, Success, 1);
1242
1243 CI->replaceAllUsesWith(Res);
1244 CI->eraseFromParent();
1245}
1246
1247Value *AtomicExpandImpl::insertRMWLLSCLoop(
1248 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1249 AtomicOrdering MemOpOrder,
1250 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1251 LLVMContext &Ctx = Builder.getContext();
1252 BasicBlock *BB = Builder.GetInsertBlock();
1253 Function *F = BB->getParent();
1254
1255 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1256 "Expected at least natural alignment at this point.");
1257
1258 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1259 //
1260 // The standard expansion we produce is:
1261 // [...]
1262 // atomicrmw.start:
1263 // %loaded = @load.linked(%addr)
1264 // %new = some_op iN %loaded, %incr
1265 // %stored = @store_conditional(%new, %addr)
1266 // %try_again = icmp i32 ne %stored, 0
1267 // br i1 %try_again, label %loop, label %atomicrmw.end
1268 // atomicrmw.end:
1269 // [...]
1270 BasicBlock *ExitBB =
1271 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1272 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1273
1274 // The split call above "helpfully" added a branch at the end of BB (to the
1275 // wrong place).
1276 std::prev(BB->end())->eraseFromParent();
1277 Builder.SetInsertPoint(BB);
1278 Builder.CreateBr(LoopBB);
1279
1280 // Start the main loop block now that we've taken care of the preliminaries.
1281 Builder.SetInsertPoint(LoopBB);
1282 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1283
1284 Value *NewVal = PerformOp(Builder, Loaded);
1285
1286 Value *StoreSuccess =
1287 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1288 Value *TryAgain = Builder.CreateICmpNE(
1289 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1290
1291 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1292
1293 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1294 // hard to predict precise branch weigths we mark the branch as "unknown"
1295 // (50/50) to prevent misleading optimizations.
1297
1298 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1299 return Loaded;
1300}
1301
1302/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1303/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1304/// IR. As a migration step, we convert back to what use to be the standard
1305/// way to represent a pointer cmpxchg so that we can update backends one by
1306/// one.
1307AtomicCmpXchgInst *
1308AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1309 auto *M = CI->getModule();
1310 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1311 M->getDataLayout());
1312
1313 ReplacementIRBuilder Builder(CI, *DL);
1314
1315 Value *Addr = CI->getPointerOperand();
1316
1317 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1318 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1319
1320 auto *NewCI = Builder.CreateAtomicCmpXchg(
1321 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1322 CI->getFailureOrdering(), CI->getSyncScopeID());
1323 NewCI->setVolatile(CI->isVolatile());
1324 NewCI->setWeak(CI->isWeak());
1325 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1326
1327 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1328 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1329
1330 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1331
1332 Value *Res = PoisonValue::get(CI->getType());
1333 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1334 Res = Builder.CreateInsertValue(Res, Succ, 1);
1335
1336 CI->replaceAllUsesWith(Res);
1337 CI->eraseFromParent();
1338 return NewCI;
1339}
1340
1341bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1342 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1343 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1344 Value *Addr = CI->getPointerOperand();
1345 BasicBlock *BB = CI->getParent();
1346 Function *F = BB->getParent();
1347 LLVMContext &Ctx = F->getContext();
1348 // If shouldInsertFencesForAtomic() returns true, then the target does not
1349 // want to deal with memory orders, and emitLeading/TrailingFence should take
1350 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1351 // should preserve the ordering.
1352 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1353 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1354 ? AtomicOrdering::Monotonic
1355 : CI->getMergedOrdering();
1356
1357 // In implementations which use a barrier to achieve release semantics, we can
1358 // delay emitting this barrier until we know a store is actually going to be
1359 // attempted. The cost of this delay is that we need 2 copies of the block
1360 // emitting the load-linked, affecting code size.
1361 //
1362 // Ideally, this logic would be unconditional except for the minsize check
1363 // since in other cases the extra blocks naturally collapse down to the
1364 // minimal loop. Unfortunately, this puts too much stress on later
1365 // optimisations so we avoid emitting the extra logic in those cases too.
1366 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1367 SuccessOrder != AtomicOrdering::Monotonic &&
1368 SuccessOrder != AtomicOrdering::Acquire &&
1369 !F->hasMinSize();
1370
1371 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1372 // do it even on minsize.
1373 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1374
1375 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1376 //
1377 // The full expansion we produce is:
1378 // [...]
1379 // %aligned.addr = ...
1380 // cmpxchg.start:
1381 // %unreleasedload = @load.linked(%aligned.addr)
1382 // %unreleasedload.extract = extract value from %unreleasedload
1383 // %should_store = icmp eq %unreleasedload.extract, %desired
1384 // br i1 %should_store, label %cmpxchg.releasingstore,
1385 // label %cmpxchg.nostore
1386 // cmpxchg.releasingstore:
1387 // fence?
1388 // br label cmpxchg.trystore
1389 // cmpxchg.trystore:
1390 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1391 // [%releasedload, %cmpxchg.releasedload]
1392 // %updated.new = insert %new into %loaded.trystore
1393 // %stored = @store_conditional(%updated.new, %aligned.addr)
1394 // %success = icmp eq i32 %stored, 0
1395 // br i1 %success, label %cmpxchg.success,
1396 // label %cmpxchg.releasedload/%cmpxchg.failure
1397 // cmpxchg.releasedload:
1398 // %releasedload = @load.linked(%aligned.addr)
1399 // %releasedload.extract = extract value from %releasedload
1400 // %should_store = icmp eq %releasedload.extract, %desired
1401 // br i1 %should_store, label %cmpxchg.trystore,
1402 // label %cmpxchg.failure
1403 // cmpxchg.success:
1404 // fence?
1405 // br label %cmpxchg.end
1406 // cmpxchg.nostore:
1407 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1408 // [%releasedload,
1409 // %cmpxchg.releasedload/%cmpxchg.trystore]
1410 // @load_linked_fail_balance()?
1411 // br label %cmpxchg.failure
1412 // cmpxchg.failure:
1413 // fence?
1414 // br label %cmpxchg.end
1415 // cmpxchg.end:
1416 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1417 // [%loaded.trystore, %cmpxchg.trystore]
1418 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1419 // %loaded = extract value from %loaded.exit
1420 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1421 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1422 // [...]
1423 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1424 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1425 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1426 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1427 auto ReleasedLoadBB =
1428 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1429 auto TryStoreBB =
1430 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1431 auto ReleasingStoreBB =
1432 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1433 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1434
1435 ReplacementIRBuilder Builder(CI, *DL);
1436
1437 // The split call above "helpfully" added a branch at the end of BB (to the
1438 // wrong place), but we might want a fence too. It's easiest to just remove
1439 // the branch entirely.
1440 std::prev(BB->end())->eraseFromParent();
1441 Builder.SetInsertPoint(BB);
1442 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1443 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1444
1445 PartwordMaskValues PMV =
1446 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1447 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1448 Builder.CreateBr(StartBB);
1449
1450 // Start the main loop block now that we've taken care of the preliminaries.
1451 Builder.SetInsertPoint(StartBB);
1452 Value *UnreleasedLoad =
1453 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1454 Value *UnreleasedLoadExtract =
1455 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1456 Value *ShouldStore = Builder.CreateICmpEQ(
1457 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1458
1459 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1460 // jump straight past that fence instruction (if it exists).
1461 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1462 MDBuilder(F->getContext()).createLikelyBranchWeights());
1463
1464 Builder.SetInsertPoint(ReleasingStoreBB);
1465 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1466 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1467 Builder.CreateBr(TryStoreBB);
1468
1469 Builder.SetInsertPoint(TryStoreBB);
1470 PHINode *LoadedTryStore =
1471 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1472 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1473 Value *NewValueInsert =
1474 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1475 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1476 PMV.AlignedAddr, MemOpOrder);
1477 StoreSuccess = Builder.CreateICmpEQ(
1478 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1479 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1480 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1481 CI->isWeak() ? FailureBB : RetryBB,
1482 MDBuilder(F->getContext()).createLikelyBranchWeights());
1483
1484 Builder.SetInsertPoint(ReleasedLoadBB);
1485 Value *SecondLoad;
1486 if (HasReleasedLoadBB) {
1487 SecondLoad =
1488 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1489 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1490 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1491 CI->getCompareOperand(), "should_store");
1492
1493 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1494 // jump straight past that fence instruction (if it exists).
1495 Builder.CreateCondBr(
1496 ShouldStore, TryStoreBB, NoStoreBB,
1497 MDBuilder(F->getContext()).createLikelyBranchWeights());
1498 // Update PHI node in TryStoreBB.
1499 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1500 } else
1501 Builder.CreateUnreachable();
1502
1503 // Make sure later instructions don't get reordered with a fence if
1504 // necessary.
1505 Builder.SetInsertPoint(SuccessBB);
1506 if (ShouldInsertFencesForAtomic ||
1508 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1509 Builder.CreateBr(ExitBB);
1510
1511 Builder.SetInsertPoint(NoStoreBB);
1512 PHINode *LoadedNoStore =
1513 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1514 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1515 if (HasReleasedLoadBB)
1516 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1517
1518 // In the failing case, where we don't execute the store-conditional, the
1519 // target might want to balance out the load-linked with a dedicated
1520 // instruction (e.g., on ARM, clearing the exclusive monitor).
1522 Builder.CreateBr(FailureBB);
1523
1524 Builder.SetInsertPoint(FailureBB);
1525 PHINode *LoadedFailure =
1526 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1527 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1528 if (CI->isWeak())
1529 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1530 if (ShouldInsertFencesForAtomic)
1531 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1532 Builder.CreateBr(ExitBB);
1533
1534 // Finally, we have control-flow based knowledge of whether the cmpxchg
1535 // succeeded or not. We expose this to later passes by converting any
1536 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1537 // PHI.
1538 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1539 PHINode *LoadedExit =
1540 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1541 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1542 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1543 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1544 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1545 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1546
1547 // This is the "exit value" from the cmpxchg expansion. It may be of
1548 // a type wider than the one in the cmpxchg instruction.
1549 Value *LoadedFull = LoadedExit;
1550
1551 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1552 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1553
1554 // Look for any users of the cmpxchg that are just comparing the loaded value
1555 // against the desired one, and replace them with the CFG-derived version.
1557 for (auto *User : CI->users()) {
1558 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1559 if (!EV)
1560 continue;
1561
1562 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1563 "weird extraction from { iN, i1 }");
1564
1565 if (EV->getIndices()[0] == 0)
1566 EV->replaceAllUsesWith(Loaded);
1567 else
1569
1570 PrunedInsts.push_back(EV);
1571 }
1572
1573 // We can remove the instructions now we're no longer iterating through them.
1574 for (auto *EV : PrunedInsts)
1575 EV->eraseFromParent();
1576
1577 if (!CI->use_empty()) {
1578 // Some use of the full struct return that we don't understand has happened,
1579 // so we've got to reconstruct it properly.
1580 Value *Res;
1581 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1582 Res = Builder.CreateInsertValue(Res, Success, 1);
1583
1584 CI->replaceAllUsesWith(Res);
1585 }
1586
1587 CI->eraseFromParent();
1588 return true;
1589}
1590
1591bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1592 // TODO: Add floating point support.
1593 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1594 if (!C)
1595 return false;
1596
1597 switch (RMWI->getOperation()) {
1598 case AtomicRMWInst::Add:
1599 case AtomicRMWInst::Sub:
1600 case AtomicRMWInst::Or:
1601 case AtomicRMWInst::Xor:
1602 return C->isZero();
1603 case AtomicRMWInst::And:
1604 return C->isMinusOne();
1605 case AtomicRMWInst::Min:
1606 return C->isMaxValue(true);
1607 case AtomicRMWInst::Max:
1608 return C->isMinValue(true);
1610 return C->isMaxValue(false);
1612 return C->isMinValue(false);
1613 default:
1614 return false;
1615 }
1616}
1617
1618bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1619 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1620 tryExpandAtomicLoad(ResultingLoad);
1621 return true;
1622 }
1623 return false;
1624}
1625
1626Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1627 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1628 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1629 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1630 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1631 LLVMContext &Ctx = Builder.getContext();
1632 BasicBlock *BB = Builder.GetInsertBlock();
1633 Function *F = BB->getParent();
1634
1635 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1636 //
1637 // The standard expansion we produce is:
1638 // [...]
1639 // %init_loaded = load atomic iN* %addr
1640 // br label %loop
1641 // loop:
1642 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1643 // %new = some_op iN %loaded, %incr
1644 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1645 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1646 // %success = extractvalue { iN, i1 } %pair, 1
1647 // br i1 %success, label %atomicrmw.end, label %loop
1648 // atomicrmw.end:
1649 // [...]
1650 BasicBlock *ExitBB =
1651 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1652 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1653
1654 // The split call above "helpfully" added a branch at the end of BB (to the
1655 // wrong place), but we want a load. It's easiest to just remove
1656 // the branch entirely.
1657 std::prev(BB->end())->eraseFromParent();
1658 Builder.SetInsertPoint(BB);
1659 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1660 Builder.CreateBr(LoopBB);
1661
1662 // Start the main loop block now that we've taken care of the preliminaries.
1663 Builder.SetInsertPoint(LoopBB);
1664 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1665 Loaded->addIncoming(InitLoaded, BB);
1666
1667 Value *NewVal = PerformOp(Builder, Loaded);
1668
1669 Value *NewLoaded = nullptr;
1670 Value *Success = nullptr;
1671
1672 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1673 MemOpOrder == AtomicOrdering::Unordered
1674 ? AtomicOrdering::Monotonic
1675 : MemOpOrder,
1676 SSID, Success, NewLoaded, MetadataSrc);
1677 assert(Success && NewLoaded);
1678
1679 Loaded->addIncoming(NewLoaded, LoopBB);
1680
1681 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1682
1683 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1684 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1685 // to prevent misleading optimizations.
1687
1688 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1689 return NewLoaded;
1690}
1691
1692bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1693 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1694 unsigned ValueSize = getAtomicOpSize(CI);
1695
1696 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1697 default:
1698 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1699 case TargetLoweringBase::AtomicExpansionKind::None:
1700 if (ValueSize < MinCASSize)
1701 return expandPartwordCmpXchg(CI);
1702 return false;
1703 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1704 return expandAtomicCmpXchg(CI);
1705 }
1706 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1707 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1708 return true;
1709 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1710 return lowerAtomicCmpXchgInst(CI);
1711 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1712 TLI->emitExpandAtomicCmpXchg(CI);
1713 return true;
1714 }
1715 }
1716}
1717
1718// Note: This function is exposed externally by AtomicExpandUtils.h
1720 CreateCmpXchgInstFun CreateCmpXchg) {
1721 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1722 Builder.setIsFPConstrained(
1723 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1724
1725 // FIXME: If FP exceptions are observable, we should force them off for the
1726 // loop for the FP atomics.
1727 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1728 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1729 AI->getOrdering(), AI->getSyncScopeID(),
1730 [&](IRBuilderBase &Builder, Value *Loaded) {
1731 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1732 AI->getValOperand());
1733 },
1734 CreateCmpXchg, /*MetadataSrc=*/AI);
1735
1736 AI->replaceAllUsesWith(Loaded);
1737 AI->eraseFromParent();
1738 return true;
1739}
1740
1741// In order to use one of the sized library calls such as
1742// __atomic_fetch_add_4, the alignment must be sufficient, the size
1743// must be one of the potentially-specialized sizes, and the value
1744// type must actually exist in C on the target (otherwise, the
1745// function wouldn't actually be defined.)
1746static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1747 const DataLayout &DL) {
1748 // TODO: "LargestSize" is an approximation for "largest type that
1749 // you can express in C". It seems to be the case that int128 is
1750 // supported on all 64-bit platforms, otherwise only up to 64-bit
1751 // integers are supported. If we get this wrong, then we'll try to
1752 // call a sized libcall that doesn't actually exist. There should
1753 // really be some more reliable way in LLVM of determining integer
1754 // sizes which are valid in the target's C ABI...
1755 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1756 return Alignment >= Size &&
1757 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1758 Size <= LargestSize;
1759}
1760
1761void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1762 static const RTLIB::Libcall Libcalls[6] = {
1763 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1764 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1765 unsigned Size = getAtomicOpSize(I);
1766
1767 bool expanded = expandAtomicOpToLibcall(
1768 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1769 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1770 if (!expanded)
1771 handleFailure(*I, "unsupported atomic load");
1772}
1773
1774void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1775 static const RTLIB::Libcall Libcalls[6] = {
1776 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1777 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1778 unsigned Size = getAtomicOpSize(I);
1779
1780 bool expanded = expandAtomicOpToLibcall(
1781 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1782 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1783 if (!expanded)
1784 handleFailure(*I, "unsupported atomic store");
1785}
1786
1787void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1788 static const RTLIB::Libcall Libcalls[6] = {
1789 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1790 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1791 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1792 unsigned Size = getAtomicOpSize(I);
1793
1794 bool expanded = expandAtomicOpToLibcall(
1795 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1796 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1797 Libcalls);
1798 if (!expanded)
1799 handleFailure(*I, "unsupported cmpxchg");
1800}
1801
1803 static const RTLIB::Libcall LibcallsXchg[6] = {
1804 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1805 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1806 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1807 static const RTLIB::Libcall LibcallsAdd[6] = {
1808 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1809 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1810 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1811 static const RTLIB::Libcall LibcallsSub[6] = {
1812 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1813 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1814 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1815 static const RTLIB::Libcall LibcallsAnd[6] = {
1816 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1817 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1818 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1819 static const RTLIB::Libcall LibcallsOr[6] = {
1820 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1821 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1822 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1823 static const RTLIB::Libcall LibcallsXor[6] = {
1824 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1825 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1826 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1827 static const RTLIB::Libcall LibcallsNand[6] = {
1828 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1829 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1830 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1831
1832 switch (Op) {
1834 llvm_unreachable("Should not have BAD_BINOP.");
1836 return ArrayRef(LibcallsXchg);
1837 case AtomicRMWInst::Add:
1838 return ArrayRef(LibcallsAdd);
1839 case AtomicRMWInst::Sub:
1840 return ArrayRef(LibcallsSub);
1841 case AtomicRMWInst::And:
1842 return ArrayRef(LibcallsAnd);
1843 case AtomicRMWInst::Or:
1844 return ArrayRef(LibcallsOr);
1845 case AtomicRMWInst::Xor:
1846 return ArrayRef(LibcallsXor);
1848 return ArrayRef(LibcallsNand);
1849 case AtomicRMWInst::Max:
1850 case AtomicRMWInst::Min:
1863 // No atomic libcalls are available for these.
1864 return {};
1865 }
1866 llvm_unreachable("Unexpected AtomicRMW operation.");
1867}
1868
1869void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1870 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1871
1872 unsigned Size = getAtomicOpSize(I);
1873
1874 bool Success = false;
1875 if (!Libcalls.empty())
1876 Success = expandAtomicOpToLibcall(
1877 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1878 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1879
1880 // The expansion failed: either there were no libcalls at all for
1881 // the operation (min/max), or there were only size-specialized
1882 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1883 // CAS libcall, via a CAS loop, instead.
1884 if (!Success) {
1886 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1887 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1888 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1889 Instruction *MetadataSrc) {
1890 // Create the CAS instruction normally...
1891 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1892 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1894 if (MetadataSrc)
1895 copyMetadataForAtomic(*Pair, *MetadataSrc);
1896
1897 Success = Builder.CreateExtractValue(Pair, 1, "success");
1898 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1899
1900 // ...and then expand the CAS into a libcall.
1901 expandAtomicCASToLibcall(Pair);
1902 });
1903 }
1904}
1905
1906// A helper routine for the above expandAtomic*ToLibcall functions.
1907//
1908// 'Libcalls' contains an array of enum values for the particular
1909// ATOMIC libcalls to be emitted. All of the other arguments besides
1910// 'I' are extracted from the Instruction subclass by the
1911// caller. Depending on the particular call, some will be null.
1912bool AtomicExpandImpl::expandAtomicOpToLibcall(
1913 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1914 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1915 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1916 assert(Libcalls.size() == 6);
1917
1918 LLVMContext &Ctx = I->getContext();
1919 Module *M = I->getModule();
1920 const DataLayout &DL = M->getDataLayout();
1921 IRBuilder<> Builder(I);
1922 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1923
1924 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1925 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1926
1927 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1928
1929 // TODO: the "order" argument type is "int", not int32. So
1930 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1931 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1932 Constant *OrderingVal =
1933 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1934 Constant *Ordering2Val = nullptr;
1935 if (CASExpected) {
1936 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1937 Ordering2Val =
1938 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1939 }
1940 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1941
1942 RTLIB::Libcall RTLibType;
1943 if (UseSizedLibcall) {
1944 switch (Size) {
1945 case 1:
1946 RTLibType = Libcalls[1];
1947 break;
1948 case 2:
1949 RTLibType = Libcalls[2];
1950 break;
1951 case 4:
1952 RTLibType = Libcalls[3];
1953 break;
1954 case 8:
1955 RTLibType = Libcalls[4];
1956 break;
1957 case 16:
1958 RTLibType = Libcalls[5];
1959 break;
1960 }
1961 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1962 RTLibType = Libcalls[0];
1963 } else {
1964 // Can't use sized function, and there's no generic for this
1965 // operation, so give up.
1966 return false;
1967 }
1968
1969 if (!TLI->getLibcallName(RTLibType)) {
1970 // This target does not implement the requested atomic libcall so give up.
1971 return false;
1972 }
1973
1974 // Build up the function call. There's two kinds. First, the sized
1975 // variants. These calls are going to be one of the following (with
1976 // N=1,2,4,8,16):
1977 // iN __atomic_load_N(iN *ptr, int ordering)
1978 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1979 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1980 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1981 // int success_order, int failure_order)
1982 //
1983 // Note that these functions can be used for non-integer atomic
1984 // operations, the values just need to be bitcast to integers on the
1985 // way in and out.
1986 //
1987 // And, then, the generic variants. They look like the following:
1988 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1989 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1990 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1991 // int ordering)
1992 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1993 // void *desired, int success_order,
1994 // int failure_order)
1995 //
1996 // The different signatures are built up depending on the
1997 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1998 // variables.
1999
2000 AllocaInst *AllocaCASExpected = nullptr;
2001 AllocaInst *AllocaValue = nullptr;
2002 AllocaInst *AllocaResult = nullptr;
2003
2004 Type *ResultTy;
2006 AttributeList Attr;
2007
2008 // 'size' argument.
2009 if (!UseSizedLibcall) {
2010 // Note, getIntPtrType is assumed equivalent to size_t.
2011 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2012 }
2013
2014 // 'ptr' argument.
2015 // note: This assumes all address spaces share a common libfunc
2016 // implementation and that addresses are convertable. For systems without
2017 // that property, we'd need to extend this mechanism to support AS-specific
2018 // families of atomic intrinsics.
2019 Value *PtrVal = PointerOperand;
2020 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2021 Args.push_back(PtrVal);
2022
2023 // 'expected' argument, if present.
2024 if (CASExpected) {
2025 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2026 AllocaCASExpected->setAlignment(AllocaAlignment);
2027 Builder.CreateLifetimeStart(AllocaCASExpected);
2028 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2029 Args.push_back(AllocaCASExpected);
2030 }
2031
2032 // 'val' argument ('desired' for cas), if present.
2033 if (ValueOperand) {
2034 if (UseSizedLibcall) {
2035 Value *IntValue =
2036 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2037 Args.push_back(IntValue);
2038 } else {
2039 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2040 AllocaValue->setAlignment(AllocaAlignment);
2041 Builder.CreateLifetimeStart(AllocaValue);
2042 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2043 Args.push_back(AllocaValue);
2044 }
2045 }
2046
2047 // 'ret' argument.
2048 if (!CASExpected && HasResult && !UseSizedLibcall) {
2049 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2050 AllocaResult->setAlignment(AllocaAlignment);
2051 Builder.CreateLifetimeStart(AllocaResult);
2052 Args.push_back(AllocaResult);
2053 }
2054
2055 // 'ordering' ('success_order' for cas) argument.
2056 Args.push_back(OrderingVal);
2057
2058 // 'failure_order' argument, if present.
2059 if (Ordering2Val)
2060 Args.push_back(Ordering2Val);
2061
2062 // Now, the return type.
2063 if (CASExpected) {
2064 ResultTy = Type::getInt1Ty(Ctx);
2065 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2066 } else if (HasResult && UseSizedLibcall)
2067 ResultTy = SizedIntTy;
2068 else
2069 ResultTy = Type::getVoidTy(Ctx);
2070
2071 // Done with setting up arguments and return types, create the call:
2073 for (Value *Arg : Args)
2074 ArgTys.push_back(Arg->getType());
2075 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2076 FunctionCallee LibcallFn =
2077 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
2078 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2079 Call->setAttributes(Attr);
2080 Value *Result = Call;
2081
2082 // And then, extract the results...
2083 if (ValueOperand && !UseSizedLibcall)
2084 Builder.CreateLifetimeEnd(AllocaValue);
2085
2086 if (CASExpected) {
2087 // The final result from the CAS is {load of 'expected' alloca, bool result
2088 // from call}
2089 Type *FinalResultTy = I->getType();
2090 Value *V = PoisonValue::get(FinalResultTy);
2091 Value *ExpectedOut = Builder.CreateAlignedLoad(
2092 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2093 Builder.CreateLifetimeEnd(AllocaCASExpected);
2094 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2095 V = Builder.CreateInsertValue(V, Result, 1);
2097 } else if (HasResult) {
2098 Value *V;
2099 if (UseSizedLibcall)
2100 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2101 else {
2102 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2103 AllocaAlignment);
2104 Builder.CreateLifetimeEnd(AllocaResult);
2105 }
2107 }
2108 I->eraseFromParent();
2109 return true;
2110}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:475
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:477
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:136
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:69
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1901
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2633
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1339
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2626
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2202
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2241
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2289
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1914
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1886
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2212
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1078
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &)
function_ref< void( IRBuilderBase &, Value *, Value *, Value *, Align, AtomicOrdering, SyncScope::ID, Value *&, Value *&, Instruction *)> CreateCmpXchgInstFun
Parameters (see the expansion example below): (the builder, addr, loaded, new_val,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
Matching combinators.