LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
76 SyncScope::ID, bool, Value *&, Value *&, Instruction *)>;
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(DiagnosticInst ? DiagnosticInst : &FailedInst, Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177
178 // Preserves the DebugLoc from I, and preserves still valid metadata.
179 // Enable StrictFP builder mode when appropriate.
180 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
181 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
183 [this](Instruction *I) { addMMRAMD(I); })) {
184 SetInsertPoint(I);
185 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
186 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
190 }
191
192 void addMMRAMD(Instruction *I) {
194 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
195 }
196};
197
198} // end anonymous namespace
199
200char AtomicExpandLegacy::ID = 0;
201
202char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
203
205 "Expand Atomic instructions", false, false)
208INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
209 "Expand Atomic instructions", false, false)
210
211// Helper functions to retrieve the size of atomic instructions.
212static unsigned getAtomicOpSize(LoadInst *LI) {
213 const DataLayout &DL = LI->getDataLayout();
214 return DL.getTypeStoreSize(LI->getType());
215}
216
217static unsigned getAtomicOpSize(StoreInst *SI) {
218 const DataLayout &DL = SI->getDataLayout();
219 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
220}
221
222static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
223 const DataLayout &DL = RMWI->getDataLayout();
224 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
225}
226
227static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
228 const DataLayout &DL = CASI->getDataLayout();
229 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
230}
231
232/// Copy metadata that's safe to preserve when widening atomics.
234 const Instruction &Source) {
236 Source.getAllMetadata(MD);
237 LLVMContext &Ctx = Dest.getContext();
238 MDBuilder MDB(Ctx);
239
240 for (auto [ID, N] : MD) {
241 switch (ID) {
242 case LLVMContext::MD_dbg:
243 case LLVMContext::MD_tbaa:
244 case LLVMContext::MD_tbaa_struct:
245 case LLVMContext::MD_alias_scope:
246 case LLVMContext::MD_noalias:
247 case LLVMContext::MD_noalias_addrspace:
248 case LLVMContext::MD_access_group:
249 case LLVMContext::MD_mmra:
250 Dest.setMetadata(ID, N);
251 break;
252 default:
253 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
254 Dest.setMetadata(ID, N);
255 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
256 Dest.setMetadata(ID, N);
257
258 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
259 // uses.
260 break;
261 }
262 }
263}
264
265template <typename Inst>
266static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
267 unsigned Size = getAtomicOpSize(I);
268 Align Alignment = I->getAlign();
269 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
270 return Alignment >= Size && Size <= MaxSize;
271}
272
273template <typename Inst>
275 raw_ostream &OS) {
276 unsigned Size = getAtomicOpSize(I);
277 Align Alignment = I->getAlign();
278 bool NeedSeparator = false;
279
280 if (Alignment < Size) {
281 OS << "instruction alignment " << Alignment.value()
282 << " is smaller than the required " << Size
283 << "-byte alignment for this atomic operation";
284 NeedSeparator = true;
285 }
286
287 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
288 if (Size > MaxSize) {
289 if (NeedSeparator)
290 OS << "; ";
291 OS << "target supports atomics up to " << MaxSize
292 << " bytes, but this atomic accesses " << Size << " bytes";
293 }
294}
295
296template <typename Inst>
297void AtomicExpandImpl::handleUnsupportedAtomicSize(
298 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
299 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
300 SmallString<128> FailureReason;
301 raw_svector_ostream OS(FailureReason);
303 handleFailure(*I, Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
304 DiagnosticInst);
305}
306
307bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
309 return false;
310
311 IRBuilder Builder(AtomicI);
312 if (auto *TrailingFence = TLI->emitTrailingFence(
313 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
314 TrailingFence->moveAfter(AtomicI);
315 return true;
316 }
317 return false;
318}
319
320template <typename AtomicInst>
321bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
322 bool OrderingRequiresFence,
323 AtomicOrdering NewOrdering) {
324 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
325 if (OrderingRequiresFence && ShouldInsertFences) {
326 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
327 AtomicI->setOrdering(NewOrdering);
328 return bracketInstWithFences(AtomicI, FenceOrdering);
329 }
330 if (!ShouldInsertFences)
331 return tryInsertTrailingSeqCstFence(AtomicI);
332 return false;
333}
334
335bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
336 if (auto *LI = dyn_cast<LoadInst>(I)) {
337 if (!LI->isAtomic())
338 return false;
339
340 if (!atomicSizeSupported(TLI, LI)) {
341 expandAtomicLoadToLibcall(LI);
342 return true;
343 }
344
345 bool MadeChange = false;
346 if (TLI->shouldCastAtomicLoadInIR(LI) ==
347 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
348 LI = convertAtomicLoadToIntegerType(LI);
349 MadeChange = true;
350 }
351
352 MadeChange |= tryInsertFencesForAtomic(
353 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
354
355 MadeChange |= tryExpandAtomicLoad(LI);
356 return MadeChange;
357 }
358
359 if (auto *SI = dyn_cast<StoreInst>(I)) {
360 if (!SI->isAtomic())
361 return false;
362
363 if (!atomicSizeSupported(TLI, SI)) {
364 expandAtomicStoreToLibcall(SI);
365 return true;
366 }
367
368 bool MadeChange = false;
369 if (TLI->shouldCastAtomicStoreInIR(SI) ==
370 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
371 SI = convertAtomicStoreToIntegerType(SI);
372 MadeChange = true;
373 }
374
375 MadeChange |= tryInsertFencesForAtomic(
376 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
377
378 MadeChange |= tryExpandAtomicStore(SI);
379 return MadeChange;
380 }
381
382 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
383 if (!atomicSizeSupported(TLI, RMWI)) {
384 expandAtomicRMWToLibcall(RMWI);
385 return true;
386 }
387
388 bool MadeChange = false;
389 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
390 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
391 RMWI = convertAtomicXchgToIntegerType(RMWI);
392 MadeChange = true;
393 }
394
395 MadeChange |= tryInsertFencesForAtomic(
396 RMWI,
397 isReleaseOrStronger(RMWI->getOrdering()) ||
398 isAcquireOrStronger(RMWI->getOrdering()),
400
401 // There are two different ways of expanding RMW instructions:
402 // - into a load if it is idempotent
403 // - into a Cmpxchg/LL-SC loop otherwise
404 // we try them in that order.
405 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
406 tryExpandAtomicRMW(RMWI);
407 return MadeChange;
408 }
409
410 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
411 if (!atomicSizeSupported(TLI, CASI)) {
412 expandAtomicCASToLibcall(CASI);
413 return true;
414 }
415
416 // TODO: when we're ready to make the change at the IR level, we can
417 // extend convertCmpXchgToInteger for floating point too.
418 bool MadeChange = false;
419 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
420 // TODO: add a TLI hook to control this so that each target can
421 // convert to lowering the original type one at a time.
422 CASI = convertCmpXchgToIntegerType(CASI);
423 MadeChange = true;
424 }
425
426 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
427 if (TLI->shouldInsertFencesForAtomic(CASI)) {
428 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
429 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
430 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
431 isAcquireOrStronger(CASI->getFailureOrdering()))) {
432 // If a compare and swap is lowered to LL/SC, we can do smarter fence
433 // insertion, with a stronger one on the success path than on the
434 // failure path. As a result, fence insertion is directly done by
435 // expandAtomicCmpXchg in that case.
436 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
437 AtomicOrdering CASOrdering =
439 CASI->setSuccessOrdering(CASOrdering);
440 CASI->setFailureOrdering(CASOrdering);
441 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
442 }
443 } else if (CmpXchgExpansion !=
444 TargetLoweringBase::AtomicExpansionKind::LLSC) {
445 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
446 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
447 }
448
449 MadeChange |= tryExpandAtomicCmpXchg(CASI);
450 return MadeChange;
451 }
452
453 return false;
454}
455
456bool AtomicExpandImpl::run(
457 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
458 const TargetMachine *TM) {
459 const auto *Subtarget = TM->getSubtargetImpl(F);
460 if (!Subtarget->enableAtomicExpand())
461 return false;
462 TLI = Subtarget->getTargetLowering();
463 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
464 DL = &F.getDataLayout();
465
466 bool MadeChange = false;
467
468 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
469 BasicBlock *BB = &*BBI;
470
472
473 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
474 I = Next) {
475 Instruction &Inst = *I;
476 Next = std::next(I);
477
478 if (processAtomicInstr(&Inst)) {
479 MadeChange = true;
480
481 // New blocks may have been inserted.
482 BBE = F.end();
483 }
484 }
485 }
486
487 return MadeChange;
488}
489
490bool AtomicExpandLegacy::runOnFunction(Function &F) {
491
492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493 if (!TPC)
494 return false;
495 auto *TM = &TPC->getTM<TargetMachine>();
496
497 const LibcallLoweringModuleAnalysisResult &LibcallResult =
498 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
499 AtomicExpandImpl AE;
500 return AE.run(F, LibcallResult, TM);
501}
502
504 return new AtomicExpandLegacy();
505}
506
509 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
510
511 const LibcallLoweringModuleAnalysisResult *LibcallResult =
512 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
513
514 if (!LibcallResult) {
515 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
516 "' analysis required");
517 return PreservedAnalyses::all();
518 }
519
520 AtomicExpandImpl AE;
521
522 bool Changed = AE.run(F, *LibcallResult, TM);
523 if (!Changed)
524 return PreservedAnalyses::all();
525
527}
528
529bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
530 AtomicOrdering Order) {
531 ReplacementIRBuilder Builder(I, *DL);
532
533 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
534
535 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
536 // We have a guard here because not every atomic operation generates a
537 // trailing fence.
538 if (TrailingFence)
539 TrailingFence->moveAfter(I);
540
541 return (LeadingFence || TrailingFence);
542}
543
544/// Get the iX type with the same bitwidth as T.
546AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
547 EVT VT = TLI->getMemValueType(DL, T);
548 unsigned BitWidth = VT.getStoreSizeInBits();
549 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
550 return IntegerType::get(T->getContext(), BitWidth);
551}
552
553/// Convert an atomic load of a non-integral type to an integer load of the
554/// equivalent bitwidth. See the function comment on
555/// convertAtomicStoreToIntegerType for background.
556LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
557 auto *M = LI->getModule();
558 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
559
560 ReplacementIRBuilder Builder(LI, *DL);
561
562 Value *Addr = LI->getPointerOperand();
563
564 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
565 NewLI->setAlignment(LI->getAlign());
566 NewLI->setVolatile(LI->isVolatile());
567 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
568 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
569
570 Value *NewVal = LI->getType()->isPtrOrPtrVectorTy()
571 ? Builder.CreateIntToPtr(NewLI, LI->getType())
572 : Builder.CreateBitCast(NewLI, LI->getType());
573 LI->replaceAllUsesWith(NewVal);
574 LI->eraseFromParent();
575 return NewLI;
576}
577
578AtomicRMWInst *
579AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
581
582 auto *M = RMWI->getModule();
583 Type *NewTy =
584 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
585
586 ReplacementIRBuilder Builder(RMWI, *DL);
587
588 Value *Addr = RMWI->getPointerOperand();
589 Value *Val = RMWI->getValOperand();
590 Value *NewVal = Val->getType()->isPointerTy()
591 ? Builder.CreatePtrToInt(Val, NewTy)
592 : Builder.CreateBitCast(Val, NewTy);
593
594 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
595 RMWI->getAlign(), RMWI->getOrdering(),
596 RMWI->getSyncScopeID());
597 NewRMWI->setVolatile(RMWI->isVolatile());
598 copyMetadataForAtomic(*NewRMWI, *RMWI);
599 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
600
601 Value *NewRVal = RMWI->getType()->isPointerTy()
602 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
603 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
604 RMWI->replaceAllUsesWith(NewRVal);
605 RMWI->eraseFromParent();
606 return NewRMWI;
607}
608
609bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
610 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
611 case TargetLoweringBase::AtomicExpansionKind::None:
612 return false;
613 case TargetLoweringBase::AtomicExpansionKind::LLSC:
614 expandAtomicOpToLLSC(
615 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
616 LI->getOrdering(),
617 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
618 return true;
619 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
620 return expandAtomicLoadToLL(LI);
621 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
622 return expandAtomicLoadToCmpXchg(LI);
623 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
624 LI->setAtomic(AtomicOrdering::NotAtomic);
625 return true;
626 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
627 TLI->emitExpandAtomicLoad(LI);
628 return true;
629 default:
630 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
631 }
632}
633
634bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
635 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
636 case TargetLoweringBase::AtomicExpansionKind::None:
637 return false;
638 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
639 TLI->emitExpandAtomicStore(SI);
640 return true;
641 case TargetLoweringBase::AtomicExpansionKind::Expand:
642 expandAtomicStoreToXChg(SI);
643 return true;
644 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
645 SI->setAtomic(AtomicOrdering::NotAtomic);
646 return true;
647 default:
648 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
649 }
650}
651
652bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
653 ReplacementIRBuilder Builder(LI, *DL);
654
655 // On some architectures, load-linked instructions are atomic for larger
656 // sizes than normal loads. For example, the only 64-bit load guaranteed
657 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
658 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
659 LI->getPointerOperand(), LI->getOrdering());
661
662 LI->replaceAllUsesWith(Val);
663 LI->eraseFromParent();
664
665 return true;
666}
667
668bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
669 ReplacementIRBuilder Builder(LI, *DL);
670 AtomicOrdering Order = LI->getOrdering();
671 if (Order == AtomicOrdering::Unordered)
672 Order = AtomicOrdering::Monotonic;
673
674 Value *Addr = LI->getPointerOperand();
675 Type *Ty = LI->getType();
676 Constant *DummyVal = Constant::getNullValue(Ty);
677
678 Value *Pair = Builder.CreateAtomicCmpXchg(
679 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
681 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
682
683 LI->replaceAllUsesWith(Loaded);
684 LI->eraseFromParent();
685
686 return true;
687}
688
689/// Convert an atomic store of a non-integral type to an integer store of the
690/// equivalent bitwidth. We used to not support floating point or vector
691/// atomics in the IR at all. The backends learned to deal with the bitcast
692/// idiom because that was the only way of expressing the notion of a atomic
693/// float or vector store. The long term plan is to teach each backend to
694/// instruction select from the original atomic store, but as a migration
695/// mechanism, we convert back to the old format which the backends understand.
696/// Each backend will need individual work to recognize the new format.
697StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
698 ReplacementIRBuilder Builder(SI, *DL);
699 auto *M = SI->getModule();
700 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
701 M->getDataLayout());
702 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
703
704 Value *Addr = SI->getPointerOperand();
705
706 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
707 NewSI->setAlignment(SI->getAlign());
708 NewSI->setVolatile(SI->isVolatile());
709 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
710 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
711 SI->eraseFromParent();
712 return NewSI;
713}
714
715void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
716 // This function is only called on atomic stores that are too large to be
717 // atomic if implemented as a native store. So we replace them by an
718 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
719 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
720 // It is the responsibility of the target to only signal expansion via
721 // shouldExpandAtomicRMW in cases where this is required and possible.
722 ReplacementIRBuilder Builder(SI, *DL);
723 AtomicOrdering Ordering = SI->getOrdering();
724 assert(Ordering != AtomicOrdering::NotAtomic);
725 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
726 ? AtomicOrdering::Monotonic
727 : Ordering;
728 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
729 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
730 SI->getAlign(), RMWOrdering);
731 SI->eraseFromParent();
732
733 // Now we have an appropriate swap instruction, lower it as usual.
734 tryExpandAtomicRMW(AI);
735}
736
737static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
738 Value *Loaded, Value *NewVal, Align AddrAlign,
739 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
740 bool IsVolatile, Value *&Success,
741 Value *&NewLoaded, Instruction *MetadataSrc) {
742 Type *OrigTy = NewVal->getType();
743
744 // This code can go away when cmpxchg supports FP and vector types.
745 assert(!OrigTy->isPointerTy());
746 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
747 if (NeedBitcast) {
748 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
749 NewVal = Builder.CreateBitCast(NewVal, IntTy);
750 Loaded = Builder.CreateBitCast(Loaded, IntTy);
751 }
752
753 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
754 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
756 Pair->setVolatile(IsVolatile);
757 if (MetadataSrc)
758 copyMetadataForAtomic(*Pair, *MetadataSrc);
759
760 Success = Builder.CreateExtractValue(Pair, 1, "success");
761 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
762
763 if (NeedBitcast)
764 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
765}
766
767bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
768 LLVMContext &Ctx = AI->getModule()->getContext();
769 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
770 switch (Kind) {
771 case TargetLoweringBase::AtomicExpansionKind::None:
772 return false;
773 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
774 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
775 unsigned ValueSize = getAtomicOpSize(AI);
776 if (ValueSize < MinCASSize) {
777 expandPartwordAtomicRMW(AI,
778 TargetLoweringBase::AtomicExpansionKind::LLSC);
779 } else {
780 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
781 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
782 AI->getValOperand());
783 };
784 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
785 AI->getAlign(), AI->getOrdering(), PerformOp);
786 }
787 return true;
788 }
789 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
790 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
791 unsigned ValueSize = getAtomicOpSize(AI);
792 if (ValueSize < MinCASSize) {
793 expandPartwordAtomicRMW(AI,
794 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
795 } else {
797 Ctx.getSyncScopeNames(SSNs);
798 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
799 ? "system"
800 : SSNs[AI->getSyncScopeID()];
801 OptimizationRemarkEmitter ORE(AI->getFunction());
802 ORE.emit([&]() {
803 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
804 << "A compare and swap loop was generated for an atomic "
805 << AI->getOperationName(AI->getOperation()) << " operation at "
806 << MemScope << " memory scope";
807 });
808 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
809 }
810 return true;
811 }
812 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
813 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
814 unsigned ValueSize = getAtomicOpSize(AI);
815 if (ValueSize < MinCASSize) {
817 // Widen And/Or/Xor and give the target another chance at expanding it.
820 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
821 return true;
822 }
823 }
824 expandAtomicRMWToMaskedIntrinsic(AI);
825 return true;
826 }
827 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
829 return true;
830 }
831 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
833 return true;
834 }
835 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
836 return lowerAtomicRMWInst(AI);
837 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
838 TLI->emitExpandAtomicRMW(AI);
839 return true;
840 default:
841 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
842 }
843}
844
845namespace {
846
847struct PartwordMaskValues {
848 // These three fields are guaranteed to be set by createMaskInstrs.
849 Type *WordType = nullptr;
850 Type *ValueType = nullptr;
851 Type *IntValueType = nullptr;
852 Value *AlignedAddr = nullptr;
853 Align AlignedAddrAlignment;
854 // The remaining fields can be null.
855 Value *ShiftAmt = nullptr;
856 Value *Mask = nullptr;
857 Value *Inv_Mask = nullptr;
858};
859
860[[maybe_unused]]
861raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
862 auto PrintObj = [&O](auto *V) {
863 if (V)
864 O << *V;
865 else
866 O << "nullptr";
867 O << '\n';
868 };
869 O << "PartwordMaskValues {\n";
870 O << " WordType: ";
871 PrintObj(PMV.WordType);
872 O << " ValueType: ";
873 PrintObj(PMV.ValueType);
874 O << " AlignedAddr: ";
875 PrintObj(PMV.AlignedAddr);
876 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
877 O << " ShiftAmt: ";
878 PrintObj(PMV.ShiftAmt);
879 O << " Mask: ";
880 PrintObj(PMV.Mask);
881 O << " Inv_Mask: ";
882 PrintObj(PMV.Inv_Mask);
883 O << "}\n";
884 return O;
885}
886
887} // end anonymous namespace
888
889/// This is a helper function which builds instructions to provide
890/// values necessary for partword atomic operations. It takes an
891/// incoming address, Addr, and ValueType, and constructs the address,
892/// shift-amounts and masks needed to work with a larger value of size
893/// WordSize.
894///
895/// AlignedAddr: Addr rounded down to a multiple of WordSize
896///
897/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
898/// from AlignAddr for it to have the same value as if
899/// ValueType was loaded from Addr.
900///
901/// Mask: Value to mask with the value loaded from AlignAddr to
902/// include only the part that would've been loaded from Addr.
903///
904/// Inv_Mask: The inverse of Mask.
905static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
907 Value *Addr, Align AddrAlign,
908 unsigned MinWordSize) {
909 PartwordMaskValues PMV;
910
911 Module *M = I->getModule();
912 LLVMContext &Ctx = M->getContext();
913 const DataLayout &DL = M->getDataLayout();
914 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
915
916 PMV.ValueType = PMV.IntValueType = ValueType;
917 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
918 PMV.IntValueType =
919 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
920
921 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
922 : ValueType;
923 if (PMV.ValueType == PMV.WordType) {
924 PMV.AlignedAddr = Addr;
925 PMV.AlignedAddrAlignment = AddrAlign;
926 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
927 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
928 return PMV;
929 }
930
931 PMV.AlignedAddrAlignment = Align(MinWordSize);
932
933 assert(ValueSize < MinWordSize);
934
935 PointerType *PtrTy = cast<PointerType>(Addr->getType());
936 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
937 Value *PtrLSB;
938
939 if (AddrAlign < MinWordSize) {
940 PMV.AlignedAddr = Builder.CreateIntrinsic(
941 Intrinsic::ptrmask, {PtrTy, IntTy},
942 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
943 nullptr, "AlignedAddr");
944
945 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
946 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
947 } else {
948 // If the alignment is high enough, the LSB are known 0.
949 PMV.AlignedAddr = Addr;
950 PtrLSB = ConstantInt::getNullValue(IntTy);
951 }
952
953 if (DL.isLittleEndian()) {
954 // turn bytes into bits
955 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
956 } else {
957 // turn bytes into bits, and count from the other side.
958 PMV.ShiftAmt = Builder.CreateShl(
959 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
960 }
961
962 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
963 PMV.Mask = Builder.CreateShl(
964 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
965 "Mask");
966
967 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
968
969 return PMV;
970}
971
972static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
973 const PartwordMaskValues &PMV) {
974 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
975 if (PMV.WordType == PMV.ValueType)
976 return WideWord;
977
978 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
979 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
980 return Builder.CreateBitCast(Trunc, PMV.ValueType);
981}
982
983static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
984 Value *Updated, const PartwordMaskValues &PMV) {
985 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
986 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
987 if (PMV.WordType == PMV.ValueType)
988 return Updated;
989
990 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
991
992 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
993 Value *Shift =
994 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
995 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
996 Value *Or = Builder.CreateOr(And, Shift, "inserted");
997 return Or;
998}
999
1000/// Emit IR to implement a masked version of a given atomicrmw
1001/// operation. (That is, only the bits under the Mask should be
1002/// affected by the operation)
1004 IRBuilderBase &Builder, Value *Loaded,
1005 Value *Shifted_Inc, Value *Inc,
1006 const PartwordMaskValues &PMV) {
1007 // TODO: update to use
1008 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1009 // to merge bits from two values without requiring PMV.Inv_Mask.
1010 switch (Op) {
1011 case AtomicRMWInst::Xchg: {
1012 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1013 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
1014 return FinalVal;
1015 }
1016 case AtomicRMWInst::Or:
1017 case AtomicRMWInst::Xor:
1018 case AtomicRMWInst::And:
1019 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1020 case AtomicRMWInst::Add:
1021 case AtomicRMWInst::Sub:
1022 case AtomicRMWInst::Nand: {
1023 // The other arithmetic ops need to be masked into place.
1024 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
1025 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
1026 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1027 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
1028 return FinalVal;
1029 }
1030 case AtomicRMWInst::Max:
1031 case AtomicRMWInst::Min:
1046 // Finally, other ops will operate on the full value, so truncate down to
1047 // the original size, and expand out again after doing the
1048 // operation. Bitcasts will be inserted for FP values.
1049 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1050 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1051 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1052 return FinalVal;
1053 }
1054 default:
1055 llvm_unreachable("Unknown atomic op");
1056 }
1057}
1058
1059/// Expand a sub-word atomicrmw operation into an appropriate
1060/// word-sized operation.
1061///
1062/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1063/// way as a typical atomicrmw expansion. The only difference here is
1064/// that the operation inside of the loop may operate upon only a
1065/// part of the value.
1066void AtomicExpandImpl::expandPartwordAtomicRMW(
1067 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1068 // Widen And/Or/Xor and give the target another chance at expanding it.
1072 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1073 return;
1074 }
1075 AtomicOrdering MemOpOrder = AI->getOrdering();
1076 SyncScope::ID SSID = AI->getSyncScopeID();
1077
1078 ReplacementIRBuilder Builder(AI, *DL);
1079
1080 PartwordMaskValues PMV =
1081 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1082 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1083
1084 Value *ValOperand_Shifted = nullptr;
1087 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1088 ValOperand_Shifted =
1089 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1090 "ValOperand_Shifted");
1091 }
1092
1093 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1094 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1095 AI->getValOperand(), PMV);
1096 };
1097
1098 Value *OldResult;
1099 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1100 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1101 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
1102 AI->isVolatile(), PerformPartwordOp,
1104 } else {
1105 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1106 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1107 PMV.AlignedAddrAlignment, MemOpOrder,
1108 PerformPartwordOp);
1109 }
1110
1111 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1112 AI->replaceAllUsesWith(FinalOldResult);
1113 AI->eraseFromParent();
1114}
1115
1116// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1117AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1118 ReplacementIRBuilder Builder(AI, *DL);
1120
1122 Op == AtomicRMWInst::And) &&
1123 "Unable to widen operation");
1124
1125 PartwordMaskValues PMV =
1126 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1127 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1128
1129 Value *ValOperand_Shifted =
1130 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1131 PMV.ShiftAmt, "ValOperand_Shifted");
1132
1133 Value *NewOperand;
1134
1135 if (Op == AtomicRMWInst::And)
1136 NewOperand =
1137 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1138 else
1139 NewOperand = ValOperand_Shifted;
1140
1141 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1142 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1143 AI->getOrdering(), AI->getSyncScopeID());
1144
1145 copyMetadataForAtomic(*NewAI, *AI);
1146
1147 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1148 AI->replaceAllUsesWith(FinalOldResult);
1149 AI->eraseFromParent();
1150 return NewAI;
1151}
1152
1153bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1154 // The basic idea here is that we're expanding a cmpxchg of a
1155 // smaller memory size up to a word-sized cmpxchg. To do this, we
1156 // need to add a retry-loop for strong cmpxchg, so that
1157 // modifications to other parts of the word don't cause a spurious
1158 // failure.
1159
1160 // This generates code like the following:
1161 // [[Setup mask values PMV.*]]
1162 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1163 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1164 // %InitLoaded = load i32* %addr
1165 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1166 // br partword.cmpxchg.loop
1167 // partword.cmpxchg.loop:
1168 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1169 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1170 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1171 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1172 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1173 // i32 %FullWord_NewVal success_ordering failure_ordering
1174 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1175 // %Success = extractvalue { i32, i1 } %NewCI, 1
1176 // br i1 %Success, label %partword.cmpxchg.end,
1177 // label %partword.cmpxchg.failure
1178 // partword.cmpxchg.failure:
1179 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1180 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1181 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1182 // label %partword.cmpxchg.end
1183 // partword.cmpxchg.end:
1184 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1185 // %FinalOldVal = trunc i32 %tmp1 to i8
1186 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1187 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1188
1189 Value *Addr = CI->getPointerOperand();
1190 Value *Cmp = CI->getCompareOperand();
1191 Value *NewVal = CI->getNewValOperand();
1192
1193 BasicBlock *BB = CI->getParent();
1194 Function *F = BB->getParent();
1195 ReplacementIRBuilder Builder(CI, *DL);
1196 LLVMContext &Ctx = Builder.getContext();
1197
1198 BasicBlock *EndBB =
1199 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1200 auto FailureBB =
1201 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1202 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1203
1204 // The split call above "helpfully" added a branch at the end of BB
1205 // (to the wrong place).
1206 std::prev(BB->end())->eraseFromParent();
1207 Builder.SetInsertPoint(BB);
1208
1209 PartwordMaskValues PMV =
1210 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1211 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1212
1213 // Shift the incoming values over, into the right location in the word.
1214 Value *NewVal_Shifted =
1215 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1216 Value *Cmp_Shifted =
1217 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1218
1219 // Load the entire current word, and mask into place the expected and new
1220 // values
1221 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1222 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1223 Builder.CreateBr(LoopBB);
1224
1225 // partword.cmpxchg.loop:
1226 Builder.SetInsertPoint(LoopBB);
1227 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1228 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1229
1230 // The initial load must be atomic with the same synchronization scope
1231 // to avoid a data race with concurrent stores. If the instruction being
1232 // emulated is volatile, issue a volatile load.
1233 // addIncoming is done first so that any replaceAllUsesWith calls during
1234 // normalization correctly update the PHI incoming value.
1235 InitLoaded->setVolatile(CI->isVolatile());
1237 InitLoaded->setAtomic(AtomicOrdering::Monotonic, CI->getSyncScopeID());
1238 // The newly created load might need to be lowered further. Because it is
1239 // created in the same block as the atomicrmw, the AtomicExpand loop will
1240 // not process it again.
1241 processAtomicInstr(InitLoaded);
1242 }
1243
1244 // Mask/Or the expected and new values into place in the loaded word.
1245 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1246 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1247 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1248 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1250 NewCI->setVolatile(CI->isVolatile());
1251 // When we're building a strong cmpxchg, we need a loop, so you
1252 // might think we could use a weak cmpxchg inside. But, using strong
1253 // allows the below comparison for ShouldContinue, and we're
1254 // expecting the underlying cmpxchg to be a machine instruction,
1255 // which is strong anyways.
1256 NewCI->setWeak(CI->isWeak());
1257
1258 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1259 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1260
1261 if (CI->isWeak())
1262 Builder.CreateBr(EndBB);
1263 else
1264 Builder.CreateCondBr(Success, EndBB, FailureBB);
1265
1266 // partword.cmpxchg.failure:
1267 Builder.SetInsertPoint(FailureBB);
1268 // Upon failure, verify that the masked-out part of the loaded value
1269 // has been modified. If it didn't, abort the cmpxchg, since the
1270 // masked-in part must've.
1271 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1272 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1273 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1274
1275 // Add the second value to the phi from above
1276 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1277
1278 // partword.cmpxchg.end:
1279 Builder.SetInsertPoint(CI);
1280
1281 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1282 Value *Res = PoisonValue::get(CI->getType());
1283 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1284 Res = Builder.CreateInsertValue(Res, Success, 1);
1285
1286 CI->replaceAllUsesWith(Res);
1287 CI->eraseFromParent();
1288 return true;
1289}
1290
1291void AtomicExpandImpl::expandAtomicOpToLLSC(
1292 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1293 AtomicOrdering MemOpOrder,
1294 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1295 ReplacementIRBuilder Builder(I, *DL);
1296 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1297 MemOpOrder, PerformOp);
1298
1299 I->replaceAllUsesWith(Loaded);
1300 I->eraseFromParent();
1301}
1302
1303void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1304 ReplacementIRBuilder Builder(AI, *DL);
1305
1306 PartwordMaskValues PMV =
1307 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1308 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1309
1310 // The value operand must be sign-extended for signed min/max so that the
1311 // target's signed comparison instructions can be used. Otherwise, just
1312 // zero-ext.
1313 Instruction::CastOps CastOp = Instruction::ZExt;
1314 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1315 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1316 CastOp = Instruction::SExt;
1317
1318 Value *ValOperand_Shifted = Builder.CreateShl(
1319 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1320 PMV.ShiftAmt, "ValOperand_Shifted");
1321 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1322 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1323 AI->getOrdering());
1324 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1325 AI->replaceAllUsesWith(FinalOldResult);
1326 AI->eraseFromParent();
1327}
1328
1329void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1330 AtomicCmpXchgInst *CI) {
1331 ReplacementIRBuilder Builder(CI, *DL);
1332
1333 PartwordMaskValues PMV = createMaskInstrs(
1334 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1335 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1336
1337 Value *CmpVal_Shifted = Builder.CreateShl(
1338 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1339 "CmpVal_Shifted");
1340 Value *NewVal_Shifted = Builder.CreateShl(
1341 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1342 "NewVal_Shifted");
1344 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1345 CI->getMergedOrdering());
1346 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1347 Value *Res = PoisonValue::get(CI->getType());
1348 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1349 Value *Success = Builder.CreateICmpEQ(
1350 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1351 Res = Builder.CreateInsertValue(Res, Success, 1);
1352
1353 CI->replaceAllUsesWith(Res);
1354 CI->eraseFromParent();
1355}
1356
1357Value *AtomicExpandImpl::insertRMWLLSCLoop(
1358 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1359 AtomicOrdering MemOpOrder,
1360 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1361 LLVMContext &Ctx = Builder.getContext();
1362 BasicBlock *BB = Builder.GetInsertBlock();
1363 Function *F = BB->getParent();
1364
1365 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1366 "Expected at least natural alignment at this point.");
1367
1368 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1369 //
1370 // The standard expansion we produce is:
1371 // [...]
1372 // atomicrmw.start:
1373 // %loaded = @load.linked(%addr)
1374 // %new = some_op iN %loaded, %incr
1375 // %stored = @store_conditional(%new, %addr)
1376 // %try_again = icmp i32 ne %stored, 0
1377 // br i1 %try_again, label %loop, label %atomicrmw.end
1378 // atomicrmw.end:
1379 // [...]
1380 BasicBlock *ExitBB =
1381 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1382 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1383
1384 // The split call above "helpfully" added a branch at the end of BB (to the
1385 // wrong place).
1386 std::prev(BB->end())->eraseFromParent();
1387 Builder.SetInsertPoint(BB);
1388 Builder.CreateBr(LoopBB);
1389
1390 // Start the main loop block now that we've taken care of the preliminaries.
1391 Builder.SetInsertPoint(LoopBB);
1392 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1393
1394 Value *NewVal = PerformOp(Builder, Loaded);
1395
1396 Value *StoreSuccess =
1397 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1398 Value *TryAgain = Builder.CreateICmpNE(
1399 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1400
1401 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1402
1403 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1404 // hard to predict precise branch weigths we mark the branch as "unknown"
1405 // (50/50) to prevent misleading optimizations.
1407
1408 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1409 return Loaded;
1410}
1411
1412/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1413/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1414/// IR. As a migration step, we convert back to what use to be the standard
1415/// way to represent a pointer cmpxchg so that we can update backends one by
1416/// one.
1417AtomicCmpXchgInst *
1418AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1419 auto *M = CI->getModule();
1420 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1421 M->getDataLayout());
1422
1423 ReplacementIRBuilder Builder(CI, *DL);
1424
1425 Value *Addr = CI->getPointerOperand();
1426
1427 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1428 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1429
1430 auto *NewCI = Builder.CreateAtomicCmpXchg(
1431 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1432 CI->getFailureOrdering(), CI->getSyncScopeID());
1433 NewCI->setVolatile(CI->isVolatile());
1434 NewCI->setWeak(CI->isWeak());
1435 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1436
1437 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1438 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1439
1440 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1441
1442 Value *Res = PoisonValue::get(CI->getType());
1443 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1444 Res = Builder.CreateInsertValue(Res, Succ, 1);
1445
1446 CI->replaceAllUsesWith(Res);
1447 CI->eraseFromParent();
1448 return NewCI;
1449}
1450
1451bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1452 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1453 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1454 Value *Addr = CI->getPointerOperand();
1455 BasicBlock *BB = CI->getParent();
1456 Function *F = BB->getParent();
1457 LLVMContext &Ctx = F->getContext();
1458 // If shouldInsertFencesForAtomic() returns true, then the target does not
1459 // want to deal with memory orders, and emitLeading/TrailingFence should take
1460 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1461 // should preserve the ordering.
1462 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1463 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1464 ? AtomicOrdering::Monotonic
1465 : CI->getMergedOrdering();
1466
1467 // In implementations which use a barrier to achieve release semantics, we can
1468 // delay emitting this barrier until we know a store is actually going to be
1469 // attempted. The cost of this delay is that we need 2 copies of the block
1470 // emitting the load-linked, affecting code size.
1471 //
1472 // Ideally, this logic would be unconditional except for the minsize check
1473 // since in other cases the extra blocks naturally collapse down to the
1474 // minimal loop. Unfortunately, this puts too much stress on later
1475 // optimisations so we avoid emitting the extra logic in those cases too.
1476 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1477 SuccessOrder != AtomicOrdering::Monotonic &&
1478 SuccessOrder != AtomicOrdering::Acquire &&
1479 !F->hasMinSize();
1480
1481 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1482 // do it even on minsize.
1483 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1484
1485 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1486 //
1487 // The full expansion we produce is:
1488 // [...]
1489 // %aligned.addr = ...
1490 // cmpxchg.start:
1491 // %unreleasedload = @load.linked(%aligned.addr)
1492 // %unreleasedload.extract = extract value from %unreleasedload
1493 // %should_store = icmp eq %unreleasedload.extract, %desired
1494 // br i1 %should_store, label %cmpxchg.releasingstore,
1495 // label %cmpxchg.nostore
1496 // cmpxchg.releasingstore:
1497 // fence?
1498 // br label cmpxchg.trystore
1499 // cmpxchg.trystore:
1500 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1501 // [%releasedload, %cmpxchg.releasedload]
1502 // %updated.new = insert %new into %loaded.trystore
1503 // %stored = @store_conditional(%updated.new, %aligned.addr)
1504 // %success = icmp eq i32 %stored, 0
1505 // br i1 %success, label %cmpxchg.success,
1506 // label %cmpxchg.releasedload/%cmpxchg.failure
1507 // cmpxchg.releasedload:
1508 // %releasedload = @load.linked(%aligned.addr)
1509 // %releasedload.extract = extract value from %releasedload
1510 // %should_store = icmp eq %releasedload.extract, %desired
1511 // br i1 %should_store, label %cmpxchg.trystore,
1512 // label %cmpxchg.failure
1513 // cmpxchg.success:
1514 // fence?
1515 // br label %cmpxchg.end
1516 // cmpxchg.nostore:
1517 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1518 // [%releasedload,
1519 // %cmpxchg.releasedload/%cmpxchg.trystore]
1520 // @load_linked_fail_balance()?
1521 // br label %cmpxchg.failure
1522 // cmpxchg.failure:
1523 // fence?
1524 // br label %cmpxchg.end
1525 // cmpxchg.end:
1526 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1527 // [%loaded.trystore, %cmpxchg.trystore]
1528 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1529 // %loaded = extract value from %loaded.exit
1530 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1531 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1532 // [...]
1533 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1534 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1535 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1536 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1537 auto ReleasedLoadBB =
1538 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1539 auto TryStoreBB =
1540 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1541 auto ReleasingStoreBB =
1542 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1543 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1544
1545 ReplacementIRBuilder Builder(CI, *DL);
1546
1547 // The split call above "helpfully" added a branch at the end of BB (to the
1548 // wrong place), but we might want a fence too. It's easiest to just remove
1549 // the branch entirely.
1550 std::prev(BB->end())->eraseFromParent();
1551 Builder.SetInsertPoint(BB);
1552 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1553 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1554
1555 PartwordMaskValues PMV =
1556 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1557 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1558 Builder.CreateBr(StartBB);
1559
1560 // Start the main loop block now that we've taken care of the preliminaries.
1561 Builder.SetInsertPoint(StartBB);
1562 Value *UnreleasedLoad =
1563 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1564 Value *UnreleasedLoadExtract =
1565 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1566 Value *ShouldStore = Builder.CreateICmpEQ(
1567 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1568
1569 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1570 // jump straight past that fence instruction (if it exists).
1571 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1572 MDBuilder(F->getContext()).createLikelyBranchWeights());
1573
1574 Builder.SetInsertPoint(ReleasingStoreBB);
1575 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1576 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1577 Builder.CreateBr(TryStoreBB);
1578
1579 Builder.SetInsertPoint(TryStoreBB);
1580 PHINode *LoadedTryStore =
1581 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1582 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1583 Value *NewValueInsert =
1584 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1585 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1586 PMV.AlignedAddr, MemOpOrder);
1587 StoreSuccess = Builder.CreateICmpEQ(
1588 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1589 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1590 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1591 CI->isWeak() ? FailureBB : RetryBB,
1592 MDBuilder(F->getContext()).createLikelyBranchWeights());
1593
1594 Builder.SetInsertPoint(ReleasedLoadBB);
1595 Value *SecondLoad;
1596 if (HasReleasedLoadBB) {
1597 SecondLoad =
1598 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1599 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1600 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1601 CI->getCompareOperand(), "should_store");
1602
1603 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1604 // jump straight past that fence instruction (if it exists).
1605 Builder.CreateCondBr(
1606 ShouldStore, TryStoreBB, NoStoreBB,
1607 MDBuilder(F->getContext()).createLikelyBranchWeights());
1608 // Update PHI node in TryStoreBB.
1609 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1610 } else
1611 Builder.CreateUnreachable();
1612
1613 // Make sure later instructions don't get reordered with a fence if
1614 // necessary.
1615 Builder.SetInsertPoint(SuccessBB);
1616 if (ShouldInsertFencesForAtomic ||
1618 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1619 Builder.CreateBr(ExitBB);
1620
1621 Builder.SetInsertPoint(NoStoreBB);
1622 PHINode *LoadedNoStore =
1623 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1624 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1625 if (HasReleasedLoadBB)
1626 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1627
1628 // In the failing case, where we don't execute the store-conditional, the
1629 // target might want to balance out the load-linked with a dedicated
1630 // instruction (e.g., on ARM, clearing the exclusive monitor).
1632 Builder.CreateBr(FailureBB);
1633
1634 Builder.SetInsertPoint(FailureBB);
1635 PHINode *LoadedFailure =
1636 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1637 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1638 if (CI->isWeak())
1639 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1640 if (ShouldInsertFencesForAtomic)
1641 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1642 Builder.CreateBr(ExitBB);
1643
1644 // Finally, we have control-flow based knowledge of whether the cmpxchg
1645 // succeeded or not. We expose this to later passes by converting any
1646 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1647 // PHI.
1648 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1649 PHINode *LoadedExit =
1650 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1651 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1652 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1653 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1654 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1655 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1656
1657 // This is the "exit value" from the cmpxchg expansion. It may be of
1658 // a type wider than the one in the cmpxchg instruction.
1659 Value *LoadedFull = LoadedExit;
1660
1661 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1662 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1663
1664 // Look for any users of the cmpxchg that are just comparing the loaded value
1665 // against the desired one, and replace them with the CFG-derived version.
1667 for (auto *User : CI->users()) {
1668 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1669 if (!EV)
1670 continue;
1671
1672 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1673 "weird extraction from { iN, i1 }");
1674
1675 if (EV->getIndices()[0] == 0)
1676 EV->replaceAllUsesWith(Loaded);
1677 else
1679
1680 PrunedInsts.push_back(EV);
1681 }
1682
1683 // We can remove the instructions now we're no longer iterating through them.
1684 for (auto *EV : PrunedInsts)
1685 EV->eraseFromParent();
1686
1687 if (!CI->use_empty()) {
1688 // Some use of the full struct return that we don't understand has happened,
1689 // so we've got to reconstruct it properly.
1690 Value *Res;
1691 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1692 Res = Builder.CreateInsertValue(Res, Success, 1);
1693
1694 CI->replaceAllUsesWith(Res);
1695 }
1696
1697 CI->eraseFromParent();
1698 return true;
1699}
1700
1701bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1702 // TODO: Add floating point support.
1703 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1704 if (!C)
1705 return false;
1706
1707 switch (RMWI->getOperation()) {
1708 case AtomicRMWInst::Add:
1709 case AtomicRMWInst::Sub:
1710 case AtomicRMWInst::Or:
1711 case AtomicRMWInst::Xor:
1712 return C->isZero();
1713 case AtomicRMWInst::And:
1714 return C->isMinusOne();
1715 case AtomicRMWInst::Min:
1716 return C->isMaxValue(true);
1717 case AtomicRMWInst::Max:
1718 return C->isMinValue(true);
1720 return C->isMaxValue(false);
1722 return C->isMinValue(false);
1723 default:
1724 return false;
1725 }
1726}
1727
1728bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1729 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1730 tryExpandAtomicLoad(ResultingLoad);
1731 return true;
1732 }
1733 return false;
1734}
1735
1736Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1737 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1738 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
1739 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1740 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1741 LLVMContext &Ctx = Builder.getContext();
1742 BasicBlock *BB = Builder.GetInsertBlock();
1743 Function *F = BB->getParent();
1744
1745 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1746 //
1747 // The standard expansion we produce is:
1748 // [...]
1749 // %init_loaded = load atomic iN* %addr
1750 // br label %loop
1751 // loop:
1752 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1753 // %new = some_op iN %loaded, %incr
1754 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1755 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1756 // %success = extractvalue { iN, i1 } %pair, 1
1757 // br i1 %success, label %atomicrmw.end, label %loop
1758 // atomicrmw.end:
1759 // [...]
1760 BasicBlock *ExitBB =
1761 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1762 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1763
1764 // The split call above "helpfully" added a branch at the end of BB (to the
1765 // wrong place), but we want a load. It's easiest to just remove
1766 // the branch entirely.
1767 std::prev(BB->end())->eraseFromParent();
1768 Builder.SetInsertPoint(BB);
1769 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1770 Builder.CreateBr(LoopBB);
1771
1772 // Start the main loop block now that we've taken care of the preliminaries.
1773 Builder.SetInsertPoint(LoopBB);
1774 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1775 Loaded->addIncoming(InitLoaded, BB);
1776
1777 // The initial load must be atomic with the same synchronization scope
1778 // to avoid a data race with concurrent stores. If the instruction being
1779 // emulated is volatile, issue a volatile load.
1780 // addIncoming is done first so that any replaceAllUsesWith calls during
1781 // normalization correctly update the PHI incoming value.
1782 InitLoaded->setVolatile(IsVolatile);
1784 InitLoaded->setAtomic(AtomicOrdering::Monotonic, SSID);
1785 // The newly created load might need to be lowered further. Because it is
1786 // created in the same block as the atomicrmw, the AtomicExpand loop will
1787 // not process it again.
1788 processAtomicInstr(InitLoaded);
1789 }
1790
1791 Value *NewVal = PerformOp(Builder, Loaded);
1792
1793 Value *NewLoaded = nullptr;
1794 Value *Success = nullptr;
1795
1796 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1797 MemOpOrder == AtomicOrdering::Unordered
1798 ? AtomicOrdering::Monotonic
1799 : MemOpOrder,
1800 SSID, IsVolatile, Success, NewLoaded, MetadataSrc);
1801 assert(Success && NewLoaded);
1802
1803 Loaded->addIncoming(NewLoaded, LoopBB);
1804
1805 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1806
1807 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1808 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1809 // to prevent misleading optimizations.
1811
1812 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1813 return NewLoaded;
1814}
1815
1816bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1817 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1818 unsigned ValueSize = getAtomicOpSize(CI);
1819
1820 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1821 default:
1822 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1823 case TargetLoweringBase::AtomicExpansionKind::None:
1824 if (ValueSize < MinCASSize)
1825 return expandPartwordCmpXchg(CI);
1826 return false;
1827 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1828 return expandAtomicCmpXchg(CI);
1829 }
1830 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1831 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1832 return true;
1833 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1834 return lowerAtomicCmpXchgInst(CI);
1835 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1836 TLI->emitExpandAtomicCmpXchg(CI);
1837 return true;
1838 }
1839 }
1840}
1841
1842bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1843 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1844 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1845 Builder.setIsFPConstrained(
1846 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1847
1848 // FIXME: If FP exceptions are observable, we should force them off for the
1849 // loop for the FP atomics.
1850 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1851 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1852 AI->getOrdering(), AI->getSyncScopeID(), AI->isVolatile(),
1853 [&](IRBuilderBase &Builder, Value *Loaded) {
1854 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1855 AI->getValOperand());
1856 },
1857 CreateCmpXchg, /*MetadataSrc=*/AI);
1858
1859 AI->replaceAllUsesWith(Loaded);
1860 AI->eraseFromParent();
1861 return true;
1862}
1863
1864// In order to use one of the sized library calls such as
1865// __atomic_fetch_add_4, the alignment must be sufficient, the size
1866// must be one of the potentially-specialized sizes, and the value
1867// type must actually exist in C on the target (otherwise, the
1868// function wouldn't actually be defined.)
1869static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1870 const DataLayout &DL) {
1871 // TODO: "LargestSize" is an approximation for "largest type that
1872 // you can express in C". It seems to be the case that int128 is
1873 // supported on all 64-bit platforms, otherwise only up to 64-bit
1874 // integers are supported. If we get this wrong, then we'll try to
1875 // call a sized libcall that doesn't actually exist. There should
1876 // really be some more reliable way in LLVM of determining integer
1877 // sizes which are valid in the target's C ABI...
1878 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1879 return Alignment >= Size &&
1880 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1881 Size <= LargestSize;
1882}
1883
1884void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1885 static const RTLIB::Libcall Libcalls[6] = {
1886 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1887 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1888 unsigned Size = getAtomicOpSize(I);
1889
1890 bool Expanded = expandAtomicOpToLibcall(
1891 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1892 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1893 if (!Expanded)
1894 handleUnsupportedAtomicSize(I, "atomic load");
1895}
1896
1897void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1898 static const RTLIB::Libcall Libcalls[6] = {
1899 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1900 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1901 unsigned Size = getAtomicOpSize(I);
1902
1903 bool Expanded = expandAtomicOpToLibcall(
1904 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1905 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1906 if (!Expanded)
1907 handleUnsupportedAtomicSize(I, "atomic store");
1908}
1909
1910void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1911 const Twine &AtomicOpName,
1912 Instruction *DiagnosticInst) {
1913 static const RTLIB::Libcall Libcalls[6] = {
1914 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1915 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1916 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1917 unsigned Size = getAtomicOpSize(I);
1918
1919 bool Expanded = expandAtomicOpToLibcall(
1920 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1921 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1922 Libcalls);
1923 if (!Expanded)
1924 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1925}
1926
1928 static const RTLIB::Libcall LibcallsXchg[6] = {
1929 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1930 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1931 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1932 static const RTLIB::Libcall LibcallsAdd[6] = {
1933 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1934 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1935 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1936 static const RTLIB::Libcall LibcallsSub[6] = {
1937 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1938 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1939 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1940 static const RTLIB::Libcall LibcallsAnd[6] = {
1941 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1942 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1943 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1944 static const RTLIB::Libcall LibcallsOr[6] = {
1945 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1946 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1947 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1948 static const RTLIB::Libcall LibcallsXor[6] = {
1949 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1950 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1951 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1952 static const RTLIB::Libcall LibcallsNand[6] = {
1953 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1954 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1955 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1956
1957 switch (Op) {
1959 llvm_unreachable("Should not have BAD_BINOP.");
1961 return ArrayRef(LibcallsXchg);
1962 case AtomicRMWInst::Add:
1963 return ArrayRef(LibcallsAdd);
1964 case AtomicRMWInst::Sub:
1965 return ArrayRef(LibcallsSub);
1966 case AtomicRMWInst::And:
1967 return ArrayRef(LibcallsAnd);
1968 case AtomicRMWInst::Or:
1969 return ArrayRef(LibcallsOr);
1970 case AtomicRMWInst::Xor:
1971 return ArrayRef(LibcallsXor);
1973 return ArrayRef(LibcallsNand);
1974 case AtomicRMWInst::Max:
1975 case AtomicRMWInst::Min:
1990 // No atomic libcalls are available for these.
1991 return {};
1992 }
1993 llvm_unreachable("Unexpected AtomicRMW operation.");
1994}
1995
1996void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1997 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1998
1999 unsigned Size = getAtomicOpSize(I);
2000
2001 bool Success = false;
2002 if (!Libcalls.empty())
2003 Success = expandAtomicOpToLibcall(
2004 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
2005 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
2006
2007 // The expansion failed: either there were no libcalls at all for
2008 // the operation (min/max), or there were only size-specialized
2009 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
2010 // CAS libcall, via a CAS loop, instead.
2011 if (!Success) {
2012 expandAtomicRMWToCmpXchg(
2013 I, [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
2014 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
2015 SyncScope::ID SSID, bool IsVolatile, Value *&Success,
2016 Value *&NewLoaded, Instruction *MetadataSrc) {
2017 // Create the CAS instruction normally...
2018 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
2019 Addr, Loaded, NewVal, Alignment, MemOpOrder,
2021 Pair->setVolatile(IsVolatile);
2022 if (MetadataSrc)
2023 copyMetadataForAtomic(*Pair, *MetadataSrc);
2024
2025 Success = Builder.CreateExtractValue(Pair, 1, "success");
2026 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
2027
2028 // ...and then expand the CAS into a libcall.
2029 expandAtomicCASToLibcall(
2030 Pair,
2031 "atomicrmw " + AtomicRMWInst::getOperationName(I->getOperation()),
2032 MetadataSrc);
2033 });
2034 }
2035}
2036
2037// A helper routine for the above expandAtomic*ToLibcall functions.
2038//
2039// 'Libcalls' contains an array of enum values for the particular
2040// ATOMIC libcalls to be emitted. All of the other arguments besides
2041// 'I' are extracted from the Instruction subclass by the
2042// caller. Depending on the particular call, some will be null.
2043bool AtomicExpandImpl::expandAtomicOpToLibcall(
2044 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2045 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2046 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2047 assert(Libcalls.size() == 6);
2048
2049 LLVMContext &Ctx = I->getContext();
2050 Module *M = I->getModule();
2051 const DataLayout &DL = M->getDataLayout();
2052 IRBuilder<> Builder(I);
2053 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2054
2055 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2056 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
2057
2058 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2059 Size == 16) {
2060 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2061 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2062 // rules handles this correctly if we pass it as a v2i64 rather than
2063 // i128. This is what Clang does in the frontend for such types as well
2064 // (see WinX86_64ABIInfo::classify in Clang).
2065 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
2066 }
2067
2068 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
2069
2070 // TODO: the "order" argument type is "int", not int32. So
2071 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2072 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2073 Constant *OrderingVal =
2074 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2075 Constant *Ordering2Val = nullptr;
2076 if (CASExpected) {
2077 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2078 Ordering2Val =
2079 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2080 }
2081 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2082
2083 RTLIB::Libcall RTLibType;
2084 if (UseSizedLibcall) {
2085 switch (Size) {
2086 case 1:
2087 RTLibType = Libcalls[1];
2088 break;
2089 case 2:
2090 RTLibType = Libcalls[2];
2091 break;
2092 case 4:
2093 RTLibType = Libcalls[3];
2094 break;
2095 case 8:
2096 RTLibType = Libcalls[4];
2097 break;
2098 case 16:
2099 RTLibType = Libcalls[5];
2100 break;
2101 }
2102 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2103 RTLibType = Libcalls[0];
2104 } else {
2105 // Can't use sized function, and there's no generic for this
2106 // operation, so give up.
2107 return false;
2108 }
2109
2110 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2111 if (LibcallImpl == RTLIB::Unsupported) {
2112 // This target does not implement the requested atomic libcall so give up.
2113 return false;
2114 }
2115
2116 // Build up the function call. There's two kinds. First, the sized
2117 // variants. These calls are going to be one of the following (with
2118 // N=1,2,4,8,16):
2119 // iN __atomic_load_N(iN *ptr, int ordering)
2120 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2121 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2122 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2123 // int success_order, int failure_order)
2124 //
2125 // Note that these functions can be used for non-integer atomic
2126 // operations, the values just need to be bitcast to integers on the
2127 // way in and out.
2128 //
2129 // And, then, the generic variants. They look like the following:
2130 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2131 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2132 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2133 // int ordering)
2134 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2135 // void *desired, int success_order,
2136 // int failure_order)
2137 //
2138 // The different signatures are built up depending on the
2139 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2140 // variables.
2141
2142 AllocaInst *AllocaCASExpected = nullptr;
2143 AllocaInst *AllocaValue = nullptr;
2144 AllocaInst *AllocaResult = nullptr;
2145
2146 Type *ResultTy;
2148 AttributeList Attr;
2149
2150 // 'size' argument.
2151 if (!UseSizedLibcall) {
2152 // Note, getIntPtrType is assumed equivalent to size_t.
2153 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2154 }
2155
2156 // 'ptr' argument.
2157 // note: This assumes all address spaces share a common libfunc
2158 // implementation and that addresses are convertable. For systems without
2159 // that property, we'd need to extend this mechanism to support AS-specific
2160 // families of atomic intrinsics.
2161 Value *PtrVal = PointerOperand;
2162 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2163 Args.push_back(PtrVal);
2164
2165 // 'expected' argument, if present.
2166 if (CASExpected) {
2167 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2168 AllocaCASExpected->setAlignment(AllocaAlignment);
2169 Builder.CreateLifetimeStart(AllocaCASExpected);
2170 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2171 Args.push_back(AllocaCASExpected);
2172 }
2173
2174 // 'val' argument ('desired' for cas), if present.
2175 if (ValueOperand) {
2176 if (UseSizedLibcall) {
2177 Value *IntValue =
2178 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2179 Args.push_back(IntValue);
2180 } else {
2181 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2182 AllocaValue->setAlignment(AllocaAlignment);
2183 Builder.CreateLifetimeStart(AllocaValue);
2184 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2185 Args.push_back(AllocaValue);
2186 }
2187 }
2188
2189 // 'ret' argument.
2190 if (!CASExpected && HasResult && !UseSizedLibcall) {
2191 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2192 AllocaResult->setAlignment(AllocaAlignment);
2193 Builder.CreateLifetimeStart(AllocaResult);
2194 Args.push_back(AllocaResult);
2195 }
2196
2197 // 'ordering' ('success_order' for cas) argument.
2198 Args.push_back(OrderingVal);
2199
2200 // 'failure_order' argument, if present.
2201 if (Ordering2Val)
2202 Args.push_back(Ordering2Val);
2203
2204 // Now, the return type.
2205 if (CASExpected) {
2206 ResultTy = Type::getInt1Ty(Ctx);
2207 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2208 } else if (HasResult && UseSizedLibcall)
2209 ResultTy = SizedIntTy;
2210 else
2211 ResultTy = Type::getVoidTy(Ctx);
2212
2213 // Done with setting up arguments and return types, create the call:
2215 for (Value *Arg : Args)
2216 ArgTys.push_back(Arg->getType());
2217 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2218 FunctionCallee LibcallFn = M->getOrInsertFunction(
2220 Attr);
2221 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2222 Call->setAttributes(Attr);
2223 Value *Result = Call;
2224
2225 // And then, extract the results...
2226 if (ValueOperand && !UseSizedLibcall)
2227 Builder.CreateLifetimeEnd(AllocaValue);
2228
2229 if (CASExpected) {
2230 // The final result from the CAS is {load of 'expected' alloca, bool result
2231 // from call}
2232 Type *FinalResultTy = I->getType();
2233 Value *V = PoisonValue::get(FinalResultTy);
2234 Value *ExpectedOut = Builder.CreateAlignedLoad(
2235 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2236 Builder.CreateLifetimeEnd(AllocaCASExpected);
2237 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2238 V = Builder.CreateInsertValue(V, Result, 1);
2240 } else if (HasResult) {
2241 Value *V;
2242 if (UseSizedLibcall) {
2243 // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2244 auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2245 auto *VTy = dyn_cast<VectorType>(I->getType());
2246 if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2247 unsigned AS = PtrTy->getAddressSpace();
2248 Value *BC = Builder.CreateBitCast(
2249 Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2250 V = Builder.CreateIntToPtr(BC, I->getType());
2251 } else
2252 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2253 } else {
2254 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2255 AllocaAlignment);
2256 Builder.CreateLifetimeEnd(AllocaResult);
2257 }
2258 I->replaceAllUsesWith(V);
2259 }
2260 I->eraseFromParent();
2261 return true;
2262}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I, raw_ostream &OS)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallString class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1969
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2681
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1935
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1238
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1380
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2674
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2237
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2276
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2378
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1232
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2324
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2539
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2374
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:358
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2242
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1918
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1533
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2120
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1592
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2232
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2553
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1954
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1614
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2247
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
Definition IRBuilder.h:1982
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:287
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
virtual bool shouldIssueAtomicLoadForAtomicEmulationLoop(void) const
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.