LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
76 SyncScope::ID, bool, Value *&, Value *&, Instruction *)>;
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(DiagnosticInst ? DiagnosticInst : &FailedInst, Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177
178 // Preserves the DebugLoc from I, and preserves still valid metadata.
179 // Enable StrictFP builder mode when appropriate.
180 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
181 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
183 [this](Instruction *I) { addMMRAMD(I); })) {
184 SetInsertPoint(I);
185 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
186 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
190 }
191
192 void addMMRAMD(Instruction *I) {
194 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
195 }
196};
197
198} // end anonymous namespace
199
200char AtomicExpandLegacy::ID = 0;
201
202char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
203
205 "Expand Atomic instructions", false, false)
208INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
209 "Expand Atomic instructions", false, false)
210
211// Helper functions to retrieve the size of atomic instructions.
212static unsigned getAtomicOpSize(LoadInst *LI) {
213 const DataLayout &DL = LI->getDataLayout();
214 return DL.getTypeStoreSize(LI->getType());
215}
216
217static unsigned getAtomicOpSize(StoreInst *SI) {
218 const DataLayout &DL = SI->getDataLayout();
219 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
220}
221
222static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
223 const DataLayout &DL = RMWI->getDataLayout();
224 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
225}
226
227static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
228 const DataLayout &DL = CASI->getDataLayout();
229 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
230}
231
232/// Copy metadata that's safe to preserve when widening atomics.
234 const Instruction &Source) {
236 Source.getAllMetadata(MD);
237 LLVMContext &Ctx = Dest.getContext();
238 MDBuilder MDB(Ctx);
239
240 for (auto [ID, N] : MD) {
241 switch (ID) {
242 case LLVMContext::MD_dbg:
243 case LLVMContext::MD_tbaa:
244 case LLVMContext::MD_tbaa_struct:
245 case LLVMContext::MD_alias_scope:
246 case LLVMContext::MD_noalias:
247 case LLVMContext::MD_noalias_addrspace:
248 case LLVMContext::MD_access_group:
249 case LLVMContext::MD_mmra:
250 Dest.setMetadata(ID, N);
251 break;
252 default:
253 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
254 Dest.setMetadata(ID, N);
255 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
256 Dest.setMetadata(ID, N);
257
258 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
259 // uses.
260 break;
261 }
262 }
263}
264
265template <typename Inst>
266static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
267 unsigned Size = getAtomicOpSize(I);
268 Align Alignment = I->getAlign();
269 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
270 return Alignment >= Size && Size <= MaxSize;
271}
272
273template <typename Inst>
275 raw_ostream &OS) {
276 unsigned Size = getAtomicOpSize(I);
277 Align Alignment = I->getAlign();
278 bool NeedSeparator = false;
279
280 if (Alignment < Size) {
281 OS << "instruction alignment " << Alignment.value()
282 << " is smaller than the required " << Size
283 << "-byte alignment for this atomic operation";
284 NeedSeparator = true;
285 }
286
287 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
288 if (Size > MaxSize) {
289 if (NeedSeparator)
290 OS << "; ";
291 OS << "target supports atomics up to " << MaxSize
292 << " bytes, but this atomic accesses " << Size << " bytes";
293 }
294}
295
296template <typename Inst>
297void AtomicExpandImpl::handleUnsupportedAtomicSize(
298 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
299 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
300 SmallString<128> FailureReason;
301 raw_svector_ostream OS(FailureReason);
303 handleFailure(*I, Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
304 DiagnosticInst);
305}
306
307bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
309 return false;
310
311 IRBuilder Builder(AtomicI);
312 if (auto *TrailingFence = TLI->emitTrailingFence(
313 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
314 TrailingFence->moveAfter(AtomicI);
315 return true;
316 }
317 return false;
318}
319
320template <typename AtomicInst>
321bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
322 bool OrderingRequiresFence,
323 AtomicOrdering NewOrdering) {
324 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
325 if (OrderingRequiresFence && ShouldInsertFences) {
326 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
327 AtomicI->setOrdering(NewOrdering);
328 return bracketInstWithFences(AtomicI, FenceOrdering);
329 }
330 if (!ShouldInsertFences)
331 return tryInsertTrailingSeqCstFence(AtomicI);
332 return false;
333}
334
335bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
336 if (auto *LI = dyn_cast<LoadInst>(I)) {
337 if (!LI->isAtomic())
338 return false;
339
340 if (!atomicSizeSupported(TLI, LI)) {
341 expandAtomicLoadToLibcall(LI);
342 return true;
343 }
344
345 bool MadeChange = false;
346 if (TLI->shouldCastAtomicLoadInIR(LI) ==
347 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
348 LI = convertAtomicLoadToIntegerType(LI);
349 MadeChange = true;
350 }
351
352 MadeChange |= tryInsertFencesForAtomic(
353 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
354
355 MadeChange |= tryExpandAtomicLoad(LI);
356 return MadeChange;
357 }
358
359 if (auto *SI = dyn_cast<StoreInst>(I)) {
360 if (!SI->isAtomic())
361 return false;
362
363 if (!atomicSizeSupported(TLI, SI)) {
364 expandAtomicStoreToLibcall(SI);
365 return true;
366 }
367
368 bool MadeChange = false;
369 if (TLI->shouldCastAtomicStoreInIR(SI) ==
370 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
371 SI = convertAtomicStoreToIntegerType(SI);
372 MadeChange = true;
373 }
374
375 MadeChange |= tryInsertFencesForAtomic(
376 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
377
378 MadeChange |= tryExpandAtomicStore(SI);
379 return MadeChange;
380 }
381
382 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
383 if (!atomicSizeSupported(TLI, RMWI)) {
384 expandAtomicRMWToLibcall(RMWI);
385 return true;
386 }
387
388 bool MadeChange = false;
389 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
390 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
391 RMWI = convertAtomicXchgToIntegerType(RMWI);
392 MadeChange = true;
393 }
394
395 MadeChange |= tryInsertFencesForAtomic(
396 RMWI,
397 isReleaseOrStronger(RMWI->getOrdering()) ||
398 isAcquireOrStronger(RMWI->getOrdering()),
400
401 // There are two different ways of expanding RMW instructions:
402 // - into a load if it is idempotent
403 // - into a Cmpxchg/LL-SC loop otherwise
404 // we try them in that order.
405 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
406 tryExpandAtomicRMW(RMWI);
407 return MadeChange;
408 }
409
410 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
411 if (!atomicSizeSupported(TLI, CASI)) {
412 expandAtomicCASToLibcall(CASI);
413 return true;
414 }
415
416 // TODO: when we're ready to make the change at the IR level, we can
417 // extend convertCmpXchgToInteger for floating point too.
418 bool MadeChange = false;
419 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
420 // TODO: add a TLI hook to control this so that each target can
421 // convert to lowering the original type one at a time.
422 CASI = convertCmpXchgToIntegerType(CASI);
423 MadeChange = true;
424 }
425
426 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
427 if (TLI->shouldInsertFencesForAtomic(CASI)) {
428 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
429 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
430 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
431 isAcquireOrStronger(CASI->getFailureOrdering()))) {
432 // If a compare and swap is lowered to LL/SC, we can do smarter fence
433 // insertion, with a stronger one on the success path than on the
434 // failure path. As a result, fence insertion is directly done by
435 // expandAtomicCmpXchg in that case.
436 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
437 AtomicOrdering CASOrdering =
439 CASI->setSuccessOrdering(CASOrdering);
440 CASI->setFailureOrdering(CASOrdering);
441 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
442 }
443 } else if (CmpXchgExpansion !=
444 TargetLoweringBase::AtomicExpansionKind::LLSC) {
445 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
446 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
447 }
448
449 MadeChange |= tryExpandAtomicCmpXchg(CASI);
450 return MadeChange;
451 }
452
453 return false;
454}
455
456bool AtomicExpandImpl::run(
457 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
458 const TargetMachine *TM) {
459 const auto *Subtarget = TM->getSubtargetImpl(F);
460 if (!Subtarget->enableAtomicExpand())
461 return false;
462 TLI = Subtarget->getTargetLowering();
463 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
464 DL = &F.getDataLayout();
465
466 bool MadeChange = false;
467
468 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
469 BasicBlock *BB = &*BBI;
470
472
473 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
474 I = Next) {
475 Instruction &Inst = *I;
476 Next = std::next(I);
477
478 if (processAtomicInstr(&Inst)) {
479 MadeChange = true;
480
481 // New blocks may have been inserted.
482 BBE = F.end();
483 }
484 }
485 }
486
487 return MadeChange;
488}
489
490bool AtomicExpandLegacy::runOnFunction(Function &F) {
491
492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493 if (!TPC)
494 return false;
495 auto *TM = &TPC->getTM<TargetMachine>();
496
497 const LibcallLoweringModuleAnalysisResult &LibcallResult =
498 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
499 AtomicExpandImpl AE;
500 return AE.run(F, LibcallResult, TM);
501}
502
504 return new AtomicExpandLegacy();
505}
506
509 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
510
511 const LibcallLoweringModuleAnalysisResult *LibcallResult =
512 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
513
514 if (!LibcallResult) {
515 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
516 "' analysis required");
517 return PreservedAnalyses::all();
518 }
519
520 AtomicExpandImpl AE;
521
522 bool Changed = AE.run(F, *LibcallResult, TM);
523 if (!Changed)
524 return PreservedAnalyses::all();
525
527}
528
529bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
530 AtomicOrdering Order) {
531 ReplacementIRBuilder Builder(I, *DL);
532
533 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
534
535 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
536 // We have a guard here because not every atomic operation generates a
537 // trailing fence.
538 if (TrailingFence)
539 TrailingFence->moveAfter(I);
540
541 return (LeadingFence || TrailingFence);
542}
543
544/// Get the iX type with the same bitwidth as T.
546AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
547 EVT VT = TLI->getMemValueType(DL, T);
548 unsigned BitWidth = VT.getStoreSizeInBits();
549 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
550 return IntegerType::get(T->getContext(), BitWidth);
551}
552
553/// Convert an atomic load of a non-integral type to an integer load of the
554/// equivalent bitwidth. See the function comment on
555/// convertAtomicStoreToIntegerType for background.
556LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
557 auto *M = LI->getModule();
558 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
559
560 ReplacementIRBuilder Builder(LI, *DL);
561
562 Value *Addr = LI->getPointerOperand();
563
564 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
565 NewLI->setAlignment(LI->getAlign());
566 NewLI->setVolatile(LI->isVolatile());
567 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
568 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
569
570 Value *NewVal = LI->getType()->isPtrOrPtrVectorTy()
571 ? Builder.CreateIntToPtr(NewLI, LI->getType())
572 : Builder.CreateBitCast(NewLI, LI->getType());
573 LI->replaceAllUsesWith(NewVal);
574 LI->eraseFromParent();
575 return NewLI;
576}
577
578AtomicRMWInst *
579AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
581
582 auto *M = RMWI->getModule();
583 Type *NewTy =
584 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
585
586 ReplacementIRBuilder Builder(RMWI, *DL);
587
588 Value *Addr = RMWI->getPointerOperand();
589 Value *Val = RMWI->getValOperand();
590 Value *NewVal = Val->getType()->isPointerTy()
591 ? Builder.CreatePtrToInt(Val, NewTy)
592 : Builder.CreateBitCast(Val, NewTy);
593
594 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
595 RMWI->getAlign(), RMWI->getOrdering(),
596 RMWI->getSyncScopeID());
597 NewRMWI->setVolatile(RMWI->isVolatile());
598 copyMetadataForAtomic(*NewRMWI, *RMWI);
599 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
600
601 Value *NewRVal = RMWI->getType()->isPointerTy()
602 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
603 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
604 RMWI->replaceAllUsesWith(NewRVal);
605 RMWI->eraseFromParent();
606 return NewRMWI;
607}
608
609bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
610 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
611 case TargetLoweringBase::AtomicExpansionKind::None:
612 return false;
613 case TargetLoweringBase::AtomicExpansionKind::LLSC:
614 expandAtomicOpToLLSC(
615 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
616 LI->getOrdering(),
617 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
618 return true;
619 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
620 return expandAtomicLoadToLL(LI);
621 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
622 return expandAtomicLoadToCmpXchg(LI);
623 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
624 LI->setAtomic(AtomicOrdering::NotAtomic);
625 return true;
626 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
627 TLI->emitExpandAtomicLoad(LI);
628 return true;
629 default:
630 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
631 }
632}
633
634bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
635 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
636 case TargetLoweringBase::AtomicExpansionKind::None:
637 return false;
638 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
639 TLI->emitExpandAtomicStore(SI);
640 return true;
641 case TargetLoweringBase::AtomicExpansionKind::Expand:
642 expandAtomicStoreToXChg(SI);
643 return true;
644 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
645 SI->setAtomic(AtomicOrdering::NotAtomic);
646 return true;
647 default:
648 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
649 }
650}
651
652bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
653 ReplacementIRBuilder Builder(LI, *DL);
654
655 // On some architectures, load-linked instructions are atomic for larger
656 // sizes than normal loads. For example, the only 64-bit load guaranteed
657 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
658 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
659 LI->getPointerOperand(), LI->getOrdering());
661
662 LI->replaceAllUsesWith(Val);
663 LI->eraseFromParent();
664
665 return true;
666}
667
668bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
669 ReplacementIRBuilder Builder(LI, *DL);
670 AtomicOrdering Order = LI->getOrdering();
671 if (Order == AtomicOrdering::Unordered)
672 Order = AtomicOrdering::Monotonic;
673
674 Value *Addr = LI->getPointerOperand();
675 Type *Ty = LI->getType();
676
677 // cmpxchg supports only integer and pointer operands. If the load type is
678 // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
679 // result back; mirrors createCmpXchgInstFun.
680 bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
681 Type *CmpXchgTy = Ty;
682 if (NeedBitcast)
683 CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
684 Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
685
686 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
687 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
689 LI->getSyncScopeID());
690 Pair->setVolatile(LI->isVolatile());
691 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
692 if (NeedBitcast)
693 Loaded = Builder.CreateBitCast(Loaded, Ty);
694
695 LI->replaceAllUsesWith(Loaded);
696 LI->eraseFromParent();
697
698 return true;
699}
700
701/// Convert an atomic store of a non-integral type to an integer store of the
702/// equivalent bitwidth. We used to not support floating point or vector
703/// atomics in the IR at all. The backends learned to deal with the bitcast
704/// idiom because that was the only way of expressing the notion of a atomic
705/// float or vector store. The long term plan is to teach each backend to
706/// instruction select from the original atomic store, but as a migration
707/// mechanism, we convert back to the old format which the backends understand.
708/// Each backend will need individual work to recognize the new format.
709StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
710 ReplacementIRBuilder Builder(SI, *DL);
711 auto *M = SI->getModule();
712 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
713 M->getDataLayout());
714 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
715
716 Value *Addr = SI->getPointerOperand();
717
718 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
719 NewSI->setAlignment(SI->getAlign());
720 NewSI->setVolatile(SI->isVolatile());
721 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
722 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
723 SI->eraseFromParent();
724 return NewSI;
725}
726
727void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
728 // This function is only called on atomic stores that are too large to be
729 // atomic if implemented as a native store. So we replace them by an
730 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
731 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
732 // It is the responsibility of the target to only signal expansion via
733 // shouldExpandAtomicRMW in cases where this is required and possible.
734 ReplacementIRBuilder Builder(SI, *DL);
735 AtomicOrdering Ordering = SI->getOrdering();
736 assert(Ordering != AtomicOrdering::NotAtomic);
737 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
738 ? AtomicOrdering::Monotonic
739 : Ordering;
740 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
741 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
742 SI->getAlign(), RMWOrdering, SI->getSyncScopeID());
743 AI->setVolatile(SI->isVolatile());
744 SI->eraseFromParent();
745
746 // Now we have an appropriate swap instruction, lower it as usual.
747 tryExpandAtomicRMW(AI);
748}
749
750static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
751 Value *Loaded, Value *NewVal, Align AddrAlign,
752 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
753 bool IsVolatile, Value *&Success,
754 Value *&NewLoaded, Instruction *MetadataSrc) {
755 Type *OrigTy = NewVal->getType();
756
757 // This code can go away when cmpxchg supports FP and vector types.
758 assert(!OrigTy->isPointerTy());
759 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
760 if (NeedBitcast) {
761 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
762 NewVal = Builder.CreateBitCast(NewVal, IntTy);
763 Loaded = Builder.CreateBitCast(Loaded, IntTy);
764 }
765
766 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
767 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
769 Pair->setVolatile(IsVolatile);
770 if (MetadataSrc)
771 copyMetadataForAtomic(*Pair, *MetadataSrc);
772
773 Success = Builder.CreateExtractValue(Pair, 1, "success");
774 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
775
776 if (NeedBitcast)
777 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
778}
779
780bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
781 LLVMContext &Ctx = AI->getModule()->getContext();
782 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
783 switch (Kind) {
784 case TargetLoweringBase::AtomicExpansionKind::None:
785 return false;
786 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
787 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
788 unsigned ValueSize = getAtomicOpSize(AI);
789 if (ValueSize < MinCASSize) {
790 expandPartwordAtomicRMW(AI,
791 TargetLoweringBase::AtomicExpansionKind::LLSC);
792 } else {
793 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
794 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
795 AI->getValOperand());
796 };
797 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
798 AI->getAlign(), AI->getOrdering(), PerformOp);
799 }
800 return true;
801 }
802 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
803 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
804 unsigned ValueSize = getAtomicOpSize(AI);
805 if (ValueSize < MinCASSize) {
806 expandPartwordAtomicRMW(AI,
807 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
808 } else {
810 Ctx.getSyncScopeNames(SSNs);
811 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
812 ? "system"
813 : SSNs[AI->getSyncScopeID()];
814 OptimizationRemarkEmitter ORE(AI->getFunction());
815 ORE.emit([&]() {
816 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
817 << "A compare and swap loop was generated for an atomic "
818 << AI->getOperationName(AI->getOperation()) << " operation at "
819 << MemScope << " memory scope";
820 });
821 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
822 }
823 return true;
824 }
825 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
826 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
827 unsigned ValueSize = getAtomicOpSize(AI);
828 if (ValueSize < MinCASSize) {
830 // Widen And/Or/Xor and give the target another chance at expanding it.
833 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
834 return true;
835 }
836 }
837 expandAtomicRMWToMaskedIntrinsic(AI);
838 return true;
839 }
840 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
842 return true;
843 }
844 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
846 return true;
847 }
848 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
849 return lowerAtomicRMWInst(AI);
850 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
851 TLI->emitExpandAtomicRMW(AI);
852 return true;
853 default:
854 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
855 }
856}
857
858namespace {
859
860struct PartwordMaskValues {
861 // These three fields are guaranteed to be set by createMaskInstrs.
862 Type *WordType = nullptr;
863 Type *ValueType = nullptr;
864 Type *IntValueType = nullptr;
865 Value *AlignedAddr = nullptr;
866 Align AlignedAddrAlignment;
867 // The remaining fields can be null.
868 Value *ShiftAmt = nullptr;
869 Value *Mask = nullptr;
870 Value *Inv_Mask = nullptr;
871};
872
873[[maybe_unused]]
874raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
875 auto PrintObj = [&O](auto *V) {
876 if (V)
877 O << *V;
878 else
879 O << "nullptr";
880 O << '\n';
881 };
882 O << "PartwordMaskValues {\n";
883 O << " WordType: ";
884 PrintObj(PMV.WordType);
885 O << " ValueType: ";
886 PrintObj(PMV.ValueType);
887 O << " AlignedAddr: ";
888 PrintObj(PMV.AlignedAddr);
889 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
890 O << " ShiftAmt: ";
891 PrintObj(PMV.ShiftAmt);
892 O << " Mask: ";
893 PrintObj(PMV.Mask);
894 O << " Inv_Mask: ";
895 PrintObj(PMV.Inv_Mask);
896 O << "}\n";
897 return O;
898}
899
900} // end anonymous namespace
901
902/// This is a helper function which builds instructions to provide
903/// values necessary for partword atomic operations. It takes an
904/// incoming address, Addr, and ValueType, and constructs the address,
905/// shift-amounts and masks needed to work with a larger value of size
906/// WordSize.
907///
908/// AlignedAddr: Addr rounded down to a multiple of WordSize
909///
910/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
911/// from AlignAddr for it to have the same value as if
912/// ValueType was loaded from Addr.
913///
914/// Mask: Value to mask with the value loaded from AlignAddr to
915/// include only the part that would've been loaded from Addr.
916///
917/// Inv_Mask: The inverse of Mask.
918static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
920 Value *Addr, Align AddrAlign,
921 unsigned MinWordSize) {
922 PartwordMaskValues PMV;
923
924 Module *M = I->getModule();
925 LLVMContext &Ctx = M->getContext();
926 const DataLayout &DL = M->getDataLayout();
927 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
928
929 PMV.ValueType = PMV.IntValueType = ValueType;
930 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
931 PMV.IntValueType =
932 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
933
934 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
935 : ValueType;
936 if (PMV.ValueType == PMV.WordType) {
937 PMV.AlignedAddr = Addr;
938 PMV.AlignedAddrAlignment = AddrAlign;
939 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
940 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
941 return PMV;
942 }
943
944 PMV.AlignedAddrAlignment = Align(MinWordSize);
945
946 assert(ValueSize < MinWordSize);
947
948 PointerType *PtrTy = cast<PointerType>(Addr->getType());
949 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
950 Value *PtrLSB;
951
952 if (AddrAlign < MinWordSize) {
953 PMV.AlignedAddr = Builder.CreateIntrinsic(
954 Intrinsic::ptrmask, {PtrTy, IntTy},
955 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
956 nullptr, "AlignedAddr");
957
958 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
959 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
960 } else {
961 // If the alignment is high enough, the LSB are known 0.
962 PMV.AlignedAddr = Addr;
963 PtrLSB = ConstantInt::getNullValue(IntTy);
964 }
965
966 if (DL.isLittleEndian()) {
967 // turn bytes into bits
968 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
969 } else {
970 // turn bytes into bits, and count from the other side.
971 PMV.ShiftAmt = Builder.CreateShl(
972 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
973 }
974
975 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
976 PMV.Mask = Builder.CreateShl(
977 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
978 "Mask");
979
980 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
981
982 return PMV;
983}
984
985static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
986 const PartwordMaskValues &PMV) {
987 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
988 if (PMV.WordType == PMV.ValueType)
989 return WideWord;
990
991 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
992 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
993 return Builder.CreateBitCast(Trunc, PMV.ValueType);
994}
995
996static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
997 Value *Updated, const PartwordMaskValues &PMV) {
998 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
999 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
1000 if (PMV.WordType == PMV.ValueType)
1001 return Updated;
1002
1003 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
1004
1005 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
1006 Value *Shift =
1007 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
1008 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
1009 Value *Or = Builder.CreateOr(And, Shift, "inserted");
1010 return Or;
1011}
1012
1013/// Emit IR to implement a masked version of a given atomicrmw
1014/// operation. (That is, only the bits under the Mask should be
1015/// affected by the operation)
1017 IRBuilderBase &Builder, Value *Loaded,
1018 Value *Shifted_Inc, Value *Inc,
1019 const PartwordMaskValues &PMV) {
1020 // TODO: update to use
1021 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1022 // to merge bits from two values without requiring PMV.Inv_Mask.
1023 switch (Op) {
1024 case AtomicRMWInst::Xchg: {
1025 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1026 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
1027 return FinalVal;
1028 }
1029 case AtomicRMWInst::Or:
1030 case AtomicRMWInst::Xor:
1031 case AtomicRMWInst::And:
1032 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1033 case AtomicRMWInst::Add:
1034 case AtomicRMWInst::Sub:
1035 case AtomicRMWInst::Nand: {
1036 // The other arithmetic ops need to be masked into place.
1037 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
1038 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
1039 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1040 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
1041 return FinalVal;
1042 }
1043 case AtomicRMWInst::Max:
1044 case AtomicRMWInst::Min:
1059 // Finally, other ops will operate on the full value, so truncate down to
1060 // the original size, and expand out again after doing the
1061 // operation. Bitcasts will be inserted for FP values.
1062 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1063 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1064 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1065 return FinalVal;
1066 }
1067 default:
1068 llvm_unreachable("Unknown atomic op");
1069 }
1070}
1071
1072/// Expand a sub-word atomicrmw operation into an appropriate
1073/// word-sized operation.
1074///
1075/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1076/// way as a typical atomicrmw expansion. The only difference here is
1077/// that the operation inside of the loop may operate upon only a
1078/// part of the value.
1079void AtomicExpandImpl::expandPartwordAtomicRMW(
1080 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1081 // Widen And/Or/Xor and give the target another chance at expanding it.
1085 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1086 return;
1087 }
1088 AtomicOrdering MemOpOrder = AI->getOrdering();
1089 SyncScope::ID SSID = AI->getSyncScopeID();
1090
1091 ReplacementIRBuilder Builder(AI, *DL);
1092
1093 PartwordMaskValues PMV =
1094 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1095 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1096
1097 Value *ValOperand_Shifted = nullptr;
1100 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1101 ValOperand_Shifted =
1102 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1103 "ValOperand_Shifted");
1104 }
1105
1106 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1107 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1108 AI->getValOperand(), PMV);
1109 };
1110
1111 Value *OldResult;
1112 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1113 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1114 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
1115 AI->isVolatile(), PerformPartwordOp,
1117 } else {
1118 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1119 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1120 PMV.AlignedAddrAlignment, MemOpOrder,
1121 PerformPartwordOp);
1122 }
1123
1124 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1125 AI->replaceAllUsesWith(FinalOldResult);
1126 AI->eraseFromParent();
1127}
1128
1129// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1130AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1131 ReplacementIRBuilder Builder(AI, *DL);
1133
1135 Op == AtomicRMWInst::And) &&
1136 "Unable to widen operation");
1137
1138 PartwordMaskValues PMV =
1139 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1140 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1141
1142 Value *ValOperand_Shifted =
1143 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1144 PMV.ShiftAmt, "ValOperand_Shifted");
1145
1146 Value *NewOperand;
1147
1148 if (Op == AtomicRMWInst::And)
1149 NewOperand =
1150 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1151 else
1152 NewOperand = ValOperand_Shifted;
1153
1154 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1155 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1156 AI->getOrdering(), AI->getSyncScopeID());
1157
1158 NewAI->setVolatile(AI->isVolatile());
1159 copyMetadataForAtomic(*NewAI, *AI);
1160
1161 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1162 AI->replaceAllUsesWith(FinalOldResult);
1163 AI->eraseFromParent();
1164 return NewAI;
1165}
1166
1167bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1168 // The basic idea here is that we're expanding a cmpxchg of a
1169 // smaller memory size up to a word-sized cmpxchg. To do this, we
1170 // need to add a retry-loop for strong cmpxchg, so that
1171 // modifications to other parts of the word don't cause a spurious
1172 // failure.
1173
1174 // This generates code like the following:
1175 // [[Setup mask values PMV.*]]
1176 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1177 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1178 // %InitLoaded = load i32* %addr
1179 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1180 // br partword.cmpxchg.loop
1181 // partword.cmpxchg.loop:
1182 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1183 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1184 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1185 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1186 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1187 // i32 %FullWord_NewVal success_ordering failure_ordering
1188 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1189 // %Success = extractvalue { i32, i1 } %NewCI, 1
1190 // br i1 %Success, label %partword.cmpxchg.end,
1191 // label %partword.cmpxchg.failure
1192 // partword.cmpxchg.failure:
1193 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1194 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1195 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1196 // label %partword.cmpxchg.end
1197 // partword.cmpxchg.end:
1198 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1199 // %FinalOldVal = trunc i32 %tmp1 to i8
1200 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1201 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1202
1203 Value *Addr = CI->getPointerOperand();
1204 Value *Cmp = CI->getCompareOperand();
1205 Value *NewVal = CI->getNewValOperand();
1206
1207 BasicBlock *BB = CI->getParent();
1208 Function *F = BB->getParent();
1209 ReplacementIRBuilder Builder(CI, *DL);
1210 LLVMContext &Ctx = Builder.getContext();
1211
1212 BasicBlock *EndBB =
1213 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1214 auto FailureBB =
1215 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1216 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1217
1218 // The split call above "helpfully" added a branch at the end of BB
1219 // (to the wrong place).
1220 std::prev(BB->end())->eraseFromParent();
1221 Builder.SetInsertPoint(BB);
1222
1223 PartwordMaskValues PMV =
1224 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1225 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1226
1227 // Shift the incoming values over, into the right location in the word.
1228 Value *NewVal_Shifted =
1229 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1230 Value *Cmp_Shifted =
1231 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1232
1233 // Load the entire current word, and mask into place the expected and new
1234 // values
1235 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1236 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1237 Builder.CreateBr(LoopBB);
1238
1239 // partword.cmpxchg.loop:
1240 Builder.SetInsertPoint(LoopBB);
1241 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1242 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1243
1244 // The initial load must be atomic with the same synchronization scope
1245 // to avoid a data race with concurrent stores. If the instruction being
1246 // emulated is volatile, issue a volatile load.
1247 // addIncoming is done first so that any replaceAllUsesWith calls during
1248 // normalization correctly update the PHI incoming value.
1249 InitLoaded->setVolatile(CI->isVolatile());
1251 InitLoaded->setAtomic(AtomicOrdering::Monotonic, CI->getSyncScopeID());
1252 // The newly created load might need to be lowered further. Because it is
1253 // created in the same block as the atomicrmw, the AtomicExpand loop will
1254 // not process it again.
1255 processAtomicInstr(InitLoaded);
1256 }
1257
1258 // Mask/Or the expected and new values into place in the loaded word.
1259 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1260 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1261 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1262 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1264 NewCI->setVolatile(CI->isVolatile());
1265 // When we're building a strong cmpxchg, we need a loop, so you
1266 // might think we could use a weak cmpxchg inside. But, using strong
1267 // allows the below comparison for ShouldContinue, and we're
1268 // expecting the underlying cmpxchg to be a machine instruction,
1269 // which is strong anyways.
1270 NewCI->setWeak(CI->isWeak());
1271
1272 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1273 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1274
1275 if (CI->isWeak())
1276 Builder.CreateBr(EndBB);
1277 else
1278 Builder.CreateCondBr(Success, EndBB, FailureBB);
1279
1280 // partword.cmpxchg.failure:
1281 Builder.SetInsertPoint(FailureBB);
1282 // Upon failure, verify that the masked-out part of the loaded value
1283 // has been modified. If it didn't, abort the cmpxchg, since the
1284 // masked-in part must've.
1285 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1286 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1287 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1288
1289 // Add the second value to the phi from above
1290 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1291
1292 // partword.cmpxchg.end:
1293 Builder.SetInsertPoint(CI);
1294
1295 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1296 Value *Res = PoisonValue::get(CI->getType());
1297 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1298 Res = Builder.CreateInsertValue(Res, Success, 1);
1299
1300 CI->replaceAllUsesWith(Res);
1301 CI->eraseFromParent();
1302 return true;
1303}
1304
1305void AtomicExpandImpl::expandAtomicOpToLLSC(
1306 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1307 AtomicOrdering MemOpOrder,
1308 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1309 ReplacementIRBuilder Builder(I, *DL);
1310 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1311 MemOpOrder, PerformOp);
1312
1313 I->replaceAllUsesWith(Loaded);
1314 I->eraseFromParent();
1315}
1316
1317void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1318 ReplacementIRBuilder Builder(AI, *DL);
1319
1320 PartwordMaskValues PMV =
1321 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1322 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1323
1324 // The value operand must be sign-extended for signed min/max so that the
1325 // target's signed comparison instructions can be used. Otherwise, just
1326 // zero-ext.
1327 Instruction::CastOps CastOp = Instruction::ZExt;
1328 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1329 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1330 CastOp = Instruction::SExt;
1331
1332 Value *ValOperand_Shifted = Builder.CreateShl(
1333 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1334 PMV.ShiftAmt, "ValOperand_Shifted");
1335 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1336 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1337 AI->getOrdering());
1338 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1339 AI->replaceAllUsesWith(FinalOldResult);
1340 AI->eraseFromParent();
1341}
1342
1343void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1344 AtomicCmpXchgInst *CI) {
1345 ReplacementIRBuilder Builder(CI, *DL);
1346
1347 PartwordMaskValues PMV = createMaskInstrs(
1348 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1349 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1350
1351 Value *CmpVal_Shifted = Builder.CreateShl(
1352 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1353 "CmpVal_Shifted");
1354 Value *NewVal_Shifted = Builder.CreateShl(
1355 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1356 "NewVal_Shifted");
1358 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1359 CI->getMergedOrdering());
1360 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1361 Value *Res = PoisonValue::get(CI->getType());
1362 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1363 Value *Success = Builder.CreateICmpEQ(
1364 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1365 Res = Builder.CreateInsertValue(Res, Success, 1);
1366
1367 CI->replaceAllUsesWith(Res);
1368 CI->eraseFromParent();
1369}
1370
1371Value *AtomicExpandImpl::insertRMWLLSCLoop(
1372 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1373 AtomicOrdering MemOpOrder,
1374 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1375 LLVMContext &Ctx = Builder.getContext();
1376 BasicBlock *BB = Builder.GetInsertBlock();
1377 Function *F = BB->getParent();
1378
1379 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1380 "Expected at least natural alignment at this point.");
1381
1382 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1383 //
1384 // The standard expansion we produce is:
1385 // [...]
1386 // atomicrmw.start:
1387 // %loaded = @load.linked(%addr)
1388 // %new = some_op iN %loaded, %incr
1389 // %stored = @store_conditional(%new, %addr)
1390 // %try_again = icmp i32 ne %stored, 0
1391 // br i1 %try_again, label %loop, label %atomicrmw.end
1392 // atomicrmw.end:
1393 // [...]
1394 BasicBlock *ExitBB =
1395 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1396 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1397
1398 // The split call above "helpfully" added a branch at the end of BB (to the
1399 // wrong place).
1400 std::prev(BB->end())->eraseFromParent();
1401 Builder.SetInsertPoint(BB);
1402 Builder.CreateBr(LoopBB);
1403
1404 // Start the main loop block now that we've taken care of the preliminaries.
1405 Builder.SetInsertPoint(LoopBB);
1406 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1407
1408 Value *NewVal = PerformOp(Builder, Loaded);
1409
1410 Value *StoreSuccess =
1411 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1412 Value *TryAgain = Builder.CreateICmpNE(
1413 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1414
1415 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1416
1417 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1418 // hard to predict precise branch weigths we mark the branch as "unknown"
1419 // (50/50) to prevent misleading optimizations.
1421
1422 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1423 return Loaded;
1424}
1425
1426/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1427/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1428/// IR. As a migration step, we convert back to what use to be the standard
1429/// way to represent a pointer cmpxchg so that we can update backends one by
1430/// one.
1431AtomicCmpXchgInst *
1432AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1433 auto *M = CI->getModule();
1434 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1435 M->getDataLayout());
1436
1437 ReplacementIRBuilder Builder(CI, *DL);
1438
1439 Value *Addr = CI->getPointerOperand();
1440
1441 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1442 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1443
1444 auto *NewCI = Builder.CreateAtomicCmpXchg(
1445 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1446 CI->getFailureOrdering(), CI->getSyncScopeID());
1447 NewCI->setVolatile(CI->isVolatile());
1448 NewCI->setWeak(CI->isWeak());
1449 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1450
1451 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1452 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1453
1454 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1455
1456 Value *Res = PoisonValue::get(CI->getType());
1457 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1458 Res = Builder.CreateInsertValue(Res, Succ, 1);
1459
1460 CI->replaceAllUsesWith(Res);
1461 CI->eraseFromParent();
1462 return NewCI;
1463}
1464
1465bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1466 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1467 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1468 Value *Addr = CI->getPointerOperand();
1469 BasicBlock *BB = CI->getParent();
1470 Function *F = BB->getParent();
1471 LLVMContext &Ctx = F->getContext();
1472 // If shouldInsertFencesForAtomic() returns true, then the target does not
1473 // want to deal with memory orders, and emitLeading/TrailingFence should take
1474 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1475 // should preserve the ordering.
1476 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1477 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1478 ? AtomicOrdering::Monotonic
1479 : CI->getMergedOrdering();
1480
1481 // In implementations which use a barrier to achieve release semantics, we can
1482 // delay emitting this barrier until we know a store is actually going to be
1483 // attempted. The cost of this delay is that we need 2 copies of the block
1484 // emitting the load-linked, affecting code size.
1485 //
1486 // Ideally, this logic would be unconditional except for the minsize check
1487 // since in other cases the extra blocks naturally collapse down to the
1488 // minimal loop. Unfortunately, this puts too much stress on later
1489 // optimisations so we avoid emitting the extra logic in those cases too.
1490 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1491 SuccessOrder != AtomicOrdering::Monotonic &&
1492 SuccessOrder != AtomicOrdering::Acquire &&
1493 !F->hasMinSize();
1494
1495 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1496 // do it even on minsize.
1497 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1498
1499 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1500 //
1501 // The full expansion we produce is:
1502 // [...]
1503 // %aligned.addr = ...
1504 // cmpxchg.start:
1505 // %unreleasedload = @load.linked(%aligned.addr)
1506 // %unreleasedload.extract = extract value from %unreleasedload
1507 // %should_store = icmp eq %unreleasedload.extract, %desired
1508 // br i1 %should_store, label %cmpxchg.releasingstore,
1509 // label %cmpxchg.nostore
1510 // cmpxchg.releasingstore:
1511 // fence?
1512 // br label cmpxchg.trystore
1513 // cmpxchg.trystore:
1514 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1515 // [%releasedload, %cmpxchg.releasedload]
1516 // %updated.new = insert %new into %loaded.trystore
1517 // %stored = @store_conditional(%updated.new, %aligned.addr)
1518 // %success = icmp eq i32 %stored, 0
1519 // br i1 %success, label %cmpxchg.success,
1520 // label %cmpxchg.releasedload/%cmpxchg.failure
1521 // cmpxchg.releasedload:
1522 // %releasedload = @load.linked(%aligned.addr)
1523 // %releasedload.extract = extract value from %releasedload
1524 // %should_store = icmp eq %releasedload.extract, %desired
1525 // br i1 %should_store, label %cmpxchg.trystore,
1526 // label %cmpxchg.failure
1527 // cmpxchg.success:
1528 // fence?
1529 // br label %cmpxchg.end
1530 // cmpxchg.nostore:
1531 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1532 // [%releasedload,
1533 // %cmpxchg.releasedload/%cmpxchg.trystore]
1534 // @load_linked_fail_balance()?
1535 // br label %cmpxchg.failure
1536 // cmpxchg.failure:
1537 // fence?
1538 // br label %cmpxchg.end
1539 // cmpxchg.end:
1540 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1541 // [%loaded.trystore, %cmpxchg.trystore]
1542 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1543 // %loaded = extract value from %loaded.exit
1544 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1545 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1546 // [...]
1547 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1548 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1549 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1550 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1551 auto ReleasedLoadBB =
1552 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1553 auto TryStoreBB =
1554 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1555 auto ReleasingStoreBB =
1556 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1557 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1558
1559 ReplacementIRBuilder Builder(CI, *DL);
1560
1561 // The split call above "helpfully" added a branch at the end of BB (to the
1562 // wrong place), but we might want a fence too. It's easiest to just remove
1563 // the branch entirely.
1564 std::prev(BB->end())->eraseFromParent();
1565 Builder.SetInsertPoint(BB);
1566 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1567 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1568
1569 PartwordMaskValues PMV =
1570 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1571 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1572 Builder.CreateBr(StartBB);
1573
1574 // Start the main loop block now that we've taken care of the preliminaries.
1575 Builder.SetInsertPoint(StartBB);
1576 Value *UnreleasedLoad =
1577 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1578 Value *UnreleasedLoadExtract =
1579 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1580 Value *ShouldStore = Builder.CreateICmpEQ(
1581 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1582
1583 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1584 // jump straight past that fence instruction (if it exists).
1585 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1586 MDBuilder(F->getContext()).createLikelyBranchWeights());
1587
1588 Builder.SetInsertPoint(ReleasingStoreBB);
1589 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1590 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1591 Builder.CreateBr(TryStoreBB);
1592
1593 Builder.SetInsertPoint(TryStoreBB);
1594 PHINode *LoadedTryStore =
1595 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1596 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1597 Value *NewValueInsert =
1598 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1599 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1600 PMV.AlignedAddr, MemOpOrder);
1601 StoreSuccess = Builder.CreateICmpEQ(
1602 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1603 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1604 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1605 CI->isWeak() ? FailureBB : RetryBB,
1606 MDBuilder(F->getContext()).createLikelyBranchWeights());
1607
1608 Builder.SetInsertPoint(ReleasedLoadBB);
1609 Value *SecondLoad;
1610 if (HasReleasedLoadBB) {
1611 SecondLoad =
1612 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1613 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1614 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1615 CI->getCompareOperand(), "should_store");
1616
1617 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1618 // jump straight past that fence instruction (if it exists).
1619 Builder.CreateCondBr(
1620 ShouldStore, TryStoreBB, NoStoreBB,
1621 MDBuilder(F->getContext()).createLikelyBranchWeights());
1622 // Update PHI node in TryStoreBB.
1623 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1624 } else
1625 Builder.CreateUnreachable();
1626
1627 // Make sure later instructions don't get reordered with a fence if
1628 // necessary.
1629 Builder.SetInsertPoint(SuccessBB);
1630 if (ShouldInsertFencesForAtomic ||
1632 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1633 Builder.CreateBr(ExitBB);
1634
1635 Builder.SetInsertPoint(NoStoreBB);
1636 PHINode *LoadedNoStore =
1637 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1638 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1639 if (HasReleasedLoadBB)
1640 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1641
1642 // In the failing case, where we don't execute the store-conditional, the
1643 // target might want to balance out the load-linked with a dedicated
1644 // instruction (e.g., on ARM, clearing the exclusive monitor).
1646 Builder.CreateBr(FailureBB);
1647
1648 Builder.SetInsertPoint(FailureBB);
1649 PHINode *LoadedFailure =
1650 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1651 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1652 if (CI->isWeak())
1653 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1654 if (ShouldInsertFencesForAtomic)
1655 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1656 Builder.CreateBr(ExitBB);
1657
1658 // Finally, we have control-flow based knowledge of whether the cmpxchg
1659 // succeeded or not. We expose this to later passes by converting any
1660 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1661 // PHI.
1662 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1663 PHINode *LoadedExit =
1664 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1665 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1666 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1667 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1668 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1669 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1670
1671 // This is the "exit value" from the cmpxchg expansion. It may be of
1672 // a type wider than the one in the cmpxchg instruction.
1673 Value *LoadedFull = LoadedExit;
1674
1675 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1676 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1677
1678 // Look for any users of the cmpxchg that are just comparing the loaded value
1679 // against the desired one, and replace them with the CFG-derived version.
1681 for (auto *User : CI->users()) {
1682 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1683 if (!EV)
1684 continue;
1685
1686 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1687 "weird extraction from { iN, i1 }");
1688
1689 if (EV->getIndices()[0] == 0)
1690 EV->replaceAllUsesWith(Loaded);
1691 else
1693
1694 PrunedInsts.push_back(EV);
1695 }
1696
1697 // We can remove the instructions now we're no longer iterating through them.
1698 for (auto *EV : PrunedInsts)
1699 EV->eraseFromParent();
1700
1701 if (!CI->use_empty()) {
1702 // Some use of the full struct return that we don't understand has happened,
1703 // so we've got to reconstruct it properly.
1704 Value *Res;
1705 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1706 Res = Builder.CreateInsertValue(Res, Success, 1);
1707
1708 CI->replaceAllUsesWith(Res);
1709 }
1710
1711 CI->eraseFromParent();
1712 return true;
1713}
1714
1715bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1716 if (RMWI->isVolatile())
1717 return false;
1718 // TODO: Add floating point support.
1719 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1720 if (!C)
1721 return false;
1722
1723 switch (RMWI->getOperation()) {
1724 case AtomicRMWInst::Add:
1725 case AtomicRMWInst::Sub:
1726 case AtomicRMWInst::Or:
1727 case AtomicRMWInst::Xor:
1728 return C->isZero();
1729 case AtomicRMWInst::And:
1730 return C->isMinusOne();
1731 case AtomicRMWInst::Min:
1732 return C->isMaxValue(true);
1733 case AtomicRMWInst::Max:
1734 return C->isMinValue(true);
1736 return C->isMaxValue(false);
1738 return C->isMinValue(false);
1739 default:
1740 return false;
1741 }
1742}
1743
1744bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1745 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1746 tryExpandAtomicLoad(ResultingLoad);
1747 return true;
1748 }
1749 return false;
1750}
1751
1752Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1753 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1754 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
1755 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1756 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1757 LLVMContext &Ctx = Builder.getContext();
1758 BasicBlock *BB = Builder.GetInsertBlock();
1759 Function *F = BB->getParent();
1760
1761 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1762 //
1763 // The standard expansion we produce is:
1764 // [...]
1765 // %init_loaded = load atomic iN* %addr
1766 // br label %loop
1767 // loop:
1768 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1769 // %new = some_op iN %loaded, %incr
1770 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1771 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1772 // %success = extractvalue { iN, i1 } %pair, 1
1773 // br i1 %success, label %atomicrmw.end, label %loop
1774 // atomicrmw.end:
1775 // [...]
1776 BasicBlock *ExitBB =
1777 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1778 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1779
1780 // The split call above "helpfully" added a branch at the end of BB (to the
1781 // wrong place), but we want a load. It's easiest to just remove
1782 // the branch entirely.
1783 std::prev(BB->end())->eraseFromParent();
1784 Builder.SetInsertPoint(BB);
1785 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1786 Builder.CreateBr(LoopBB);
1787
1788 // Start the main loop block now that we've taken care of the preliminaries.
1789 Builder.SetInsertPoint(LoopBB);
1790 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1791 Loaded->addIncoming(InitLoaded, BB);
1792
1793 // The initial load must be atomic with the same synchronization scope
1794 // to avoid a data race with concurrent stores. If the instruction being
1795 // emulated is volatile, issue a volatile load.
1796 // addIncoming is done first so that any replaceAllUsesWith calls during
1797 // normalization correctly update the PHI incoming value.
1798 InitLoaded->setVolatile(IsVolatile);
1800 InitLoaded->setAtomic(AtomicOrdering::Monotonic, SSID);
1801 // The newly created load might need to be lowered further. Because it is
1802 // created in the same block as the atomicrmw, the AtomicExpand loop will
1803 // not process it again.
1804 processAtomicInstr(InitLoaded);
1805 }
1806
1807 Value *NewVal = PerformOp(Builder, Loaded);
1808
1809 Value *NewLoaded = nullptr;
1810 Value *Success = nullptr;
1811
1812 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1813 MemOpOrder == AtomicOrdering::Unordered
1814 ? AtomicOrdering::Monotonic
1815 : MemOpOrder,
1816 SSID, IsVolatile, Success, NewLoaded, MetadataSrc);
1817 assert(Success && NewLoaded);
1818
1819 Loaded->addIncoming(NewLoaded, LoopBB);
1820
1821 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1822
1823 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1824 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1825 // to prevent misleading optimizations.
1827
1828 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1829 return NewLoaded;
1830}
1831
1832bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1833 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1834 unsigned ValueSize = getAtomicOpSize(CI);
1835
1836 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1837 default:
1838 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1839 case TargetLoweringBase::AtomicExpansionKind::None:
1840 if (ValueSize < MinCASSize)
1841 return expandPartwordCmpXchg(CI);
1842 return false;
1843 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1844 return expandAtomicCmpXchg(CI);
1845 }
1846 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1847 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1848 return true;
1849 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1850 return lowerAtomicCmpXchgInst(CI);
1851 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1852 TLI->emitExpandAtomicCmpXchg(CI);
1853 return true;
1854 }
1855 }
1856}
1857
1858bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1859 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1860 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1861 Builder.setIsFPConstrained(
1862 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1863
1864 // FIXME: If FP exceptions are observable, we should force them off for the
1865 // loop for the FP atomics.
1866 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1867 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1868 AI->getOrdering(), AI->getSyncScopeID(), AI->isVolatile(),
1869 [&](IRBuilderBase &Builder, Value *Loaded) {
1870 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1871 AI->getValOperand());
1872 },
1873 CreateCmpXchg, /*MetadataSrc=*/AI);
1874
1875 AI->replaceAllUsesWith(Loaded);
1876 AI->eraseFromParent();
1877 return true;
1878}
1879
1880// In order to use one of the sized library calls such as
1881// __atomic_fetch_add_4, the alignment must be sufficient, the size
1882// must be one of the potentially-specialized sizes, and the value
1883// type must actually exist in C on the target (otherwise, the
1884// function wouldn't actually be defined.)
1885static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1886 const DataLayout &DL) {
1887 // TODO: "LargestSize" is an approximation for "largest type that
1888 // you can express in C". It seems to be the case that int128 is
1889 // supported on all 64-bit platforms, otherwise only up to 64-bit
1890 // integers are supported. If we get this wrong, then we'll try to
1891 // call a sized libcall that doesn't actually exist. There should
1892 // really be some more reliable way in LLVM of determining integer
1893 // sizes which are valid in the target's C ABI...
1894 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1895 return Alignment >= Size &&
1896 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1897 Size <= LargestSize;
1898}
1899
1900void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1901 static const RTLIB::Libcall Libcalls[6] = {
1902 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1903 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1904 unsigned Size = getAtomicOpSize(I);
1905
1906 bool Expanded = expandAtomicOpToLibcall(
1907 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1908 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1909 if (!Expanded)
1910 handleUnsupportedAtomicSize(I, "atomic load");
1911}
1912
1913void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1914 static const RTLIB::Libcall Libcalls[6] = {
1915 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1916 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1917 unsigned Size = getAtomicOpSize(I);
1918
1919 bool Expanded = expandAtomicOpToLibcall(
1920 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1921 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1922 if (!Expanded)
1923 handleUnsupportedAtomicSize(I, "atomic store");
1924}
1925
1926void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1927 const Twine &AtomicOpName,
1928 Instruction *DiagnosticInst) {
1929 static const RTLIB::Libcall Libcalls[6] = {
1930 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1931 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1932 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1933 unsigned Size = getAtomicOpSize(I);
1934
1935 bool Expanded = expandAtomicOpToLibcall(
1936 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1937 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1938 Libcalls);
1939 if (!Expanded)
1940 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1941}
1942
1944 static const RTLIB::Libcall LibcallsXchg[6] = {
1945 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1946 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1947 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1948 static const RTLIB::Libcall LibcallsAdd[6] = {
1949 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1950 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1951 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1952 static const RTLIB::Libcall LibcallsSub[6] = {
1953 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1954 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1955 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1956 static const RTLIB::Libcall LibcallsAnd[6] = {
1957 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1958 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1959 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1960 static const RTLIB::Libcall LibcallsOr[6] = {
1961 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1962 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1963 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1964 static const RTLIB::Libcall LibcallsXor[6] = {
1965 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1966 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1967 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1968 static const RTLIB::Libcall LibcallsNand[6] = {
1969 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1970 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1971 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1972
1973 switch (Op) {
1975 llvm_unreachable("Should not have BAD_BINOP.");
1977 return ArrayRef(LibcallsXchg);
1978 case AtomicRMWInst::Add:
1979 return ArrayRef(LibcallsAdd);
1980 case AtomicRMWInst::Sub:
1981 return ArrayRef(LibcallsSub);
1982 case AtomicRMWInst::And:
1983 return ArrayRef(LibcallsAnd);
1984 case AtomicRMWInst::Or:
1985 return ArrayRef(LibcallsOr);
1986 case AtomicRMWInst::Xor:
1987 return ArrayRef(LibcallsXor);
1989 return ArrayRef(LibcallsNand);
1990 case AtomicRMWInst::Max:
1991 case AtomicRMWInst::Min:
2006 // No atomic libcalls are available for these.
2007 return {};
2008 }
2009 llvm_unreachable("Unexpected AtomicRMW operation.");
2010}
2011
2012void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
2013 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
2014
2015 unsigned Size = getAtomicOpSize(I);
2016
2017 bool Success = false;
2018 if (!Libcalls.empty())
2019 Success = expandAtomicOpToLibcall(
2020 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
2021 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
2022
2023 // The expansion failed: either there were no libcalls at all for
2024 // the operation (min/max), or there were only size-specialized
2025 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
2026 // CAS libcall, via a CAS loop, instead.
2027 if (!Success) {
2028 expandAtomicRMWToCmpXchg(
2029 I, [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
2030 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
2031 SyncScope::ID SSID, bool IsVolatile, Value *&Success,
2032 Value *&NewLoaded, Instruction *MetadataSrc) {
2033 // Create the CAS instruction normally...
2034 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
2035 Addr, Loaded, NewVal, Alignment, MemOpOrder,
2037 Pair->setVolatile(IsVolatile);
2038 if (MetadataSrc)
2039 copyMetadataForAtomic(*Pair, *MetadataSrc);
2040
2041 Success = Builder.CreateExtractValue(Pair, 1, "success");
2042 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
2043
2044 // ...and then expand the CAS into a libcall.
2045 expandAtomicCASToLibcall(
2046 Pair,
2047 "atomicrmw " + AtomicRMWInst::getOperationName(I->getOperation()),
2048 MetadataSrc);
2049 });
2050 }
2051}
2052
2053// A helper routine for the above expandAtomic*ToLibcall functions.
2054//
2055// 'Libcalls' contains an array of enum values for the particular
2056// ATOMIC libcalls to be emitted. All of the other arguments besides
2057// 'I' are extracted from the Instruction subclass by the
2058// caller. Depending on the particular call, some will be null.
2059bool AtomicExpandImpl::expandAtomicOpToLibcall(
2060 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2061 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2062 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2063 assert(Libcalls.size() == 6);
2064
2065 LLVMContext &Ctx = I->getContext();
2066 Module *M = I->getModule();
2067 const DataLayout &DL = M->getDataLayout();
2068 IRBuilder<> Builder(I);
2069 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2070
2071 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2072 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
2073
2074 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2075 Size == 16) {
2076 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2077 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2078 // rules handles this correctly if we pass it as a v2i64 rather than
2079 // i128. This is what Clang does in the frontend for such types as well
2080 // (see WinX86_64ABIInfo::classify in Clang).
2081 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
2082 }
2083
2084 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
2085
2086 // TODO: the "order" argument type is "int", not int32. So
2087 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2088 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2089 Constant *OrderingVal =
2090 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2091 Constant *Ordering2Val = nullptr;
2092 if (CASExpected) {
2093 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2094 Ordering2Val =
2095 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2096 }
2097 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2098
2099 RTLIB::Libcall RTLibType;
2100 if (UseSizedLibcall) {
2101 switch (Size) {
2102 case 1:
2103 RTLibType = Libcalls[1];
2104 break;
2105 case 2:
2106 RTLibType = Libcalls[2];
2107 break;
2108 case 4:
2109 RTLibType = Libcalls[3];
2110 break;
2111 case 8:
2112 RTLibType = Libcalls[4];
2113 break;
2114 case 16:
2115 RTLibType = Libcalls[5];
2116 break;
2117 }
2118 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2119 RTLibType = Libcalls[0];
2120 } else {
2121 // Can't use sized function, and there's no generic for this
2122 // operation, so give up.
2123 return false;
2124 }
2125
2126 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2127 if (LibcallImpl == RTLIB::Unsupported) {
2128 // This target does not implement the requested atomic libcall so give up.
2129 return false;
2130 }
2131
2132 // Build up the function call. There's two kinds. First, the sized
2133 // variants. These calls are going to be one of the following (with
2134 // N=1,2,4,8,16):
2135 // iN __atomic_load_N(iN *ptr, int ordering)
2136 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2137 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2138 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2139 // int success_order, int failure_order)
2140 //
2141 // Note that these functions can be used for non-integer atomic
2142 // operations, the values just need to be bitcast to integers on the
2143 // way in and out.
2144 //
2145 // And, then, the generic variants. They look like the following:
2146 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2147 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2148 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2149 // int ordering)
2150 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2151 // void *desired, int success_order,
2152 // int failure_order)
2153 //
2154 // The different signatures are built up depending on the
2155 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2156 // variables.
2157
2158 AllocaInst *AllocaCASExpected = nullptr;
2159 AllocaInst *AllocaValue = nullptr;
2160 AllocaInst *AllocaResult = nullptr;
2161
2162 Type *ResultTy;
2164 AttributeList Attr;
2165
2166 // 'size' argument.
2167 if (!UseSizedLibcall) {
2168 // Note, getIntPtrType is assumed equivalent to size_t.
2169 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2170 }
2171
2172 // 'ptr' argument.
2173 // note: This assumes all address spaces share a common libfunc
2174 // implementation and that addresses are convertable. For systems without
2175 // that property, we'd need to extend this mechanism to support AS-specific
2176 // families of atomic intrinsics.
2177 Value *PtrVal = PointerOperand;
2178 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2179 Args.push_back(PtrVal);
2180
2181 // 'expected' argument, if present.
2182 if (CASExpected) {
2183 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2184 AllocaCASExpected->setAlignment(AllocaAlignment);
2185 Builder.CreateLifetimeStart(AllocaCASExpected);
2186 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2187 Args.push_back(AllocaCASExpected);
2188 }
2189
2190 // 'val' argument ('desired' for cas), if present.
2191 if (ValueOperand) {
2192 if (UseSizedLibcall) {
2193 Value *IntValue =
2194 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2195 Args.push_back(IntValue);
2196 } else {
2197 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2198 AllocaValue->setAlignment(AllocaAlignment);
2199 Builder.CreateLifetimeStart(AllocaValue);
2200 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2201 Args.push_back(AllocaValue);
2202 }
2203 }
2204
2205 // 'ret' argument.
2206 if (!CASExpected && HasResult && !UseSizedLibcall) {
2207 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2208 AllocaResult->setAlignment(AllocaAlignment);
2209 Builder.CreateLifetimeStart(AllocaResult);
2210 Args.push_back(AllocaResult);
2211 }
2212
2213 // 'ordering' ('success_order' for cas) argument.
2214 Args.push_back(OrderingVal);
2215
2216 // 'failure_order' argument, if present.
2217 if (Ordering2Val)
2218 Args.push_back(Ordering2Val);
2219
2220 // Now, the return type.
2221 if (CASExpected) {
2222 ResultTy = Type::getInt1Ty(Ctx);
2223 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2224 } else if (HasResult && UseSizedLibcall)
2225 ResultTy = SizedIntTy;
2226 else
2227 ResultTy = Type::getVoidTy(Ctx);
2228
2229 // Done with setting up arguments and return types, create the call:
2231 for (Value *Arg : Args)
2232 ArgTys.push_back(Arg->getType());
2233 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2234 FunctionCallee LibcallFn = M->getOrInsertFunction(
2236 Attr);
2237 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2238 Call->setAttributes(Attr);
2239 Value *Result = Call;
2240
2241 // And then, extract the results...
2242 if (ValueOperand && !UseSizedLibcall)
2243 Builder.CreateLifetimeEnd(AllocaValue);
2244
2245 if (CASExpected) {
2246 // The final result from the CAS is {load of 'expected' alloca, bool result
2247 // from call}
2248 Type *FinalResultTy = I->getType();
2249 Value *V = PoisonValue::get(FinalResultTy);
2250 Value *ExpectedOut = Builder.CreateAlignedLoad(
2251 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2252 Builder.CreateLifetimeEnd(AllocaCASExpected);
2253 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2254 V = Builder.CreateInsertValue(V, Result, 1);
2256 } else if (HasResult) {
2257 Value *V;
2258 if (UseSizedLibcall) {
2259 // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2260 auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2261 auto *VTy = dyn_cast<VectorType>(I->getType());
2262 if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2263 unsigned AS = PtrTy->getAddressSpace();
2264 Value *BC = Builder.CreateBitCast(
2265 Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2266 V = Builder.CreateIntToPtr(BC, I->getType());
2267 } else
2268 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2269 } else {
2270 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2271 AllocaAlignment);
2272 Builder.CreateLifetimeEnd(AllocaResult);
2273 }
2274 I->replaceAllUsesWith(V);
2275 }
2276 I->eraseFromParent();
2277 return true;
2278}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I, raw_ostream &OS)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallString class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1969
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2681
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1935
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1238
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1380
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2674
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2237
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2276
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2378
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1232
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2324
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2539
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2374
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:358
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2242
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1918
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1533
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2120
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1592
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2232
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2553
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1954
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1614
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2247
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
Definition IRBuilder.h:1982
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:288
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
virtual bool shouldIssueAtomicLoadForAtomicEmulationLoop(void) const
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
LLVM_ABI bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:435
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.