LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
76 SyncScope::ID, bool, Value *&, Value *&, Instruction *)>;
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(DiagnosticInst ? DiagnosticInst : &FailedInst, Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177
178 // Preserves the DebugLoc from I, and preserves still valid metadata.
179 // Enable StrictFP builder mode when appropriate.
180 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
181 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
183 [this](Instruction *I) { addMMRAMD(I); })) {
184 SetInsertPoint(I);
185 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
186 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
190 }
191
192 void addMMRAMD(Instruction *I) {
194 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
195 }
196};
197
198} // end anonymous namespace
199
200char AtomicExpandLegacy::ID = 0;
201
202char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
203
205 "Expand Atomic instructions", false, false)
208INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
209 "Expand Atomic instructions", false, false)
210
211// Helper functions to retrieve the size of atomic instructions.
212static unsigned getAtomicOpSize(LoadInst *LI) {
213 const DataLayout &DL = LI->getDataLayout();
214 return DL.getTypeStoreSize(LI->getType());
215}
216
217static unsigned getAtomicOpSize(StoreInst *SI) {
218 const DataLayout &DL = SI->getDataLayout();
219 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
220}
221
222static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
223 const DataLayout &DL = RMWI->getDataLayout();
224 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
225}
226
227static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
228 const DataLayout &DL = CASI->getDataLayout();
229 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
230}
231
232/// Copy metadata that's safe to preserve when widening atomics.
234 const Instruction &Source) {
236 Source.getAllMetadata(MD);
237 LLVMContext &Ctx = Dest.getContext();
238 MDBuilder MDB(Ctx);
239
240 for (auto [ID, N] : MD) {
241 switch (ID) {
242 case LLVMContext::MD_dbg:
243 case LLVMContext::MD_tbaa:
244 case LLVMContext::MD_tbaa_struct:
245 case LLVMContext::MD_alias_scope:
246 case LLVMContext::MD_noalias:
247 case LLVMContext::MD_noalias_addrspace:
248 case LLVMContext::MD_access_group:
249 case LLVMContext::MD_mmra:
250 Dest.setMetadata(ID, N);
251 break;
252 default:
253 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
254 Dest.setMetadata(ID, N);
255 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
256 Dest.setMetadata(ID, N);
257
258 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
259 // uses.
260 break;
261 }
262 }
263}
264
265template <typename Inst>
266static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
267 unsigned Size = getAtomicOpSize(I);
268 Align Alignment = I->getAlign();
269 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
270 return Alignment >= Size && Size <= MaxSize;
271}
272
273template <typename Inst>
275 raw_ostream &OS) {
276 unsigned Size = getAtomicOpSize(I);
277 Align Alignment = I->getAlign();
278 bool NeedSeparator = false;
279
280 if (Alignment < Size) {
281 OS << "instruction alignment " << Alignment.value()
282 << " is smaller than the required " << Size
283 << "-byte alignment for this atomic operation";
284 NeedSeparator = true;
285 }
286
287 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
288 if (Size > MaxSize) {
289 if (NeedSeparator)
290 OS << "; ";
291 OS << "target supports atomics up to " << MaxSize
292 << " bytes, but this atomic accesses " << Size << " bytes";
293 }
294}
295
296template <typename Inst>
297void AtomicExpandImpl::handleUnsupportedAtomicSize(
298 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
299 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
300 SmallString<128> FailureReason;
301 raw_svector_ostream OS(FailureReason);
303 handleFailure(*I, Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
304 DiagnosticInst);
305}
306
307bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
309 return false;
310
311 IRBuilder Builder(AtomicI);
312 if (auto *TrailingFence = TLI->emitTrailingFence(
313 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
314 TrailingFence->moveAfter(AtomicI);
315 return true;
316 }
317 return false;
318}
319
320template <typename AtomicInst>
321bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
322 bool OrderingRequiresFence,
323 AtomicOrdering NewOrdering) {
324 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
325 if (OrderingRequiresFence && ShouldInsertFences) {
326 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
327 AtomicI->setOrdering(NewOrdering);
328 return bracketInstWithFences(AtomicI, FenceOrdering);
329 }
330 if (!ShouldInsertFences)
331 return tryInsertTrailingSeqCstFence(AtomicI);
332 return false;
333}
334
335bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
336 if (auto *LI = dyn_cast<LoadInst>(I)) {
337 if (!LI->isAtomic())
338 return false;
339
340 if (!atomicSizeSupported(TLI, LI)) {
341 expandAtomicLoadToLibcall(LI);
342 return true;
343 }
344
345 bool MadeChange = false;
346 if (TLI->shouldCastAtomicLoadInIR(LI) ==
347 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
348 LI = convertAtomicLoadToIntegerType(LI);
349 MadeChange = true;
350 }
351
352 MadeChange |= tryInsertFencesForAtomic(
353 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
354
355 MadeChange |= tryExpandAtomicLoad(LI);
356 return MadeChange;
357 }
358
359 if (auto *SI = dyn_cast<StoreInst>(I)) {
360 if (!SI->isAtomic())
361 return false;
362
363 if (!atomicSizeSupported(TLI, SI)) {
364 expandAtomicStoreToLibcall(SI);
365 return true;
366 }
367
368 bool MadeChange = false;
369 if (TLI->shouldCastAtomicStoreInIR(SI) ==
370 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
371 SI = convertAtomicStoreToIntegerType(SI);
372 MadeChange = true;
373 }
374
375 MadeChange |= tryInsertFencesForAtomic(
376 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
377
378 MadeChange |= tryExpandAtomicStore(SI);
379 return MadeChange;
380 }
381
382 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
383 if (!atomicSizeSupported(TLI, RMWI)) {
384 expandAtomicRMWToLibcall(RMWI);
385 return true;
386 }
387
388 bool MadeChange = false;
389 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
390 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
391 RMWI = convertAtomicXchgToIntegerType(RMWI);
392 MadeChange = true;
393 }
394
395 MadeChange |= tryInsertFencesForAtomic(
396 RMWI,
397 isReleaseOrStronger(RMWI->getOrdering()) ||
398 isAcquireOrStronger(RMWI->getOrdering()),
400
401 // There are two different ways of expanding RMW instructions:
402 // - into a load if it is idempotent
403 // - into a Cmpxchg/LL-SC loop otherwise
404 // we try them in that order.
405 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
406 tryExpandAtomicRMW(RMWI);
407 return MadeChange;
408 }
409
410 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
411 if (!atomicSizeSupported(TLI, CASI)) {
412 expandAtomicCASToLibcall(CASI);
413 return true;
414 }
415
416 // TODO: when we're ready to make the change at the IR level, we can
417 // extend convertCmpXchgToInteger for floating point too.
418 bool MadeChange = false;
419 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
420 // TODO: add a TLI hook to control this so that each target can
421 // convert to lowering the original type one at a time.
422 CASI = convertCmpXchgToIntegerType(CASI);
423 MadeChange = true;
424 }
425
426 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
427 if (TLI->shouldInsertFencesForAtomic(CASI)) {
428 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
429 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
430 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
431 isAcquireOrStronger(CASI->getFailureOrdering()))) {
432 // If a compare and swap is lowered to LL/SC, we can do smarter fence
433 // insertion, with a stronger one on the success path than on the
434 // failure path. As a result, fence insertion is directly done by
435 // expandAtomicCmpXchg in that case.
436 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
437 AtomicOrdering CASOrdering =
439 CASI->setSuccessOrdering(CASOrdering);
440 CASI->setFailureOrdering(CASOrdering);
441 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
442 }
443 } else if (CmpXchgExpansion !=
444 TargetLoweringBase::AtomicExpansionKind::LLSC) {
445 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
446 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
447 }
448
449 MadeChange |= tryExpandAtomicCmpXchg(CASI);
450 return MadeChange;
451 }
452
453 return false;
454}
455
456bool AtomicExpandImpl::run(
457 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
458 const TargetMachine *TM) {
459 const auto *Subtarget = TM->getSubtargetImpl(F);
460 if (!Subtarget->enableAtomicExpand())
461 return false;
462 TLI = Subtarget->getTargetLowering();
463 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
464 DL = &F.getDataLayout();
465
466 bool MadeChange = false;
467
468 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
469 BasicBlock *BB = &*BBI;
470
472
473 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
474 I = Next) {
475 Instruction &Inst = *I;
476 Next = std::next(I);
477
478 if (processAtomicInstr(&Inst)) {
479 MadeChange = true;
480
481 // New blocks may have been inserted.
482 BBE = F.end();
483 }
484 }
485 }
486
487 return MadeChange;
488}
489
490bool AtomicExpandLegacy::runOnFunction(Function &F) {
491
492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493 if (!TPC)
494 return false;
495 auto *TM = &TPC->getTM<TargetMachine>();
496
497 const LibcallLoweringModuleAnalysisResult &LibcallResult =
498 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
499 AtomicExpandImpl AE;
500 return AE.run(F, LibcallResult, TM);
501}
502
504 return new AtomicExpandLegacy();
505}
506
509 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
510
511 const LibcallLoweringModuleAnalysisResult *LibcallResult =
512 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
513
514 if (!LibcallResult) {
515 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
516 "' analysis required");
517 return PreservedAnalyses::all();
518 }
519
520 AtomicExpandImpl AE;
521
522 bool Changed = AE.run(F, *LibcallResult, TM);
523 if (!Changed)
524 return PreservedAnalyses::all();
525
527}
528
529bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
530 AtomicOrdering Order) {
531 ReplacementIRBuilder Builder(I, *DL);
532
533 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
534
535 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
536 // We have a guard here because not every atomic operation generates a
537 // trailing fence.
538 if (TrailingFence)
539 TrailingFence->moveAfter(I);
540
541 return (LeadingFence || TrailingFence);
542}
543
544/// Get the iX type with the same bitwidth as T.
546AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
547 EVT VT = TLI->getMemValueType(DL, T);
548 unsigned BitWidth = VT.getStoreSizeInBits();
549 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
550 return IntegerType::get(T->getContext(), BitWidth);
551}
552
553/// Convert an atomic load of a non-integral type to an integer load of the
554/// equivalent bitwidth. See the function comment on
555/// convertAtomicStoreToIntegerType for background.
556LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
557 auto *M = LI->getModule();
558 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
559
560 ReplacementIRBuilder Builder(LI, *DL);
561
562 Value *Addr = LI->getPointerOperand();
563
564 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
565 NewLI->setAlignment(LI->getAlign());
566 NewLI->setVolatile(LI->isVolatile());
567 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
568 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
569
570 Value *NewVal = LI->getType()->isPtrOrPtrVectorTy()
571 ? Builder.CreateIntToPtr(NewLI, LI->getType())
572 : Builder.CreateBitCast(NewLI, LI->getType());
573 LI->replaceAllUsesWith(NewVal);
574 LI->eraseFromParent();
575 return NewLI;
576}
577
578AtomicRMWInst *
579AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
581
582 auto *M = RMWI->getModule();
583 Type *NewTy =
584 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
585
586 ReplacementIRBuilder Builder(RMWI, *DL);
587
588 Value *Addr = RMWI->getPointerOperand();
589 Value *Val = RMWI->getValOperand();
590 Value *NewVal = Val->getType()->isPointerTy()
591 ? Builder.CreatePtrToInt(Val, NewTy)
592 : Builder.CreateBitCast(Val, NewTy);
593
594 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
595 RMWI->getAlign(), RMWI->getOrdering(),
596 RMWI->getSyncScopeID());
597 NewRMWI->setVolatile(RMWI->isVolatile());
598 copyMetadataForAtomic(*NewRMWI, *RMWI);
599 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
600
601 Value *NewRVal = RMWI->getType()->isPointerTy()
602 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
603 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
604 RMWI->replaceAllUsesWith(NewRVal);
605 RMWI->eraseFromParent();
606 return NewRMWI;
607}
608
609bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
610 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
611 case TargetLoweringBase::AtomicExpansionKind::None:
612 return false;
613 case TargetLoweringBase::AtomicExpansionKind::LLSC:
614 expandAtomicOpToLLSC(
615 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
616 LI->getOrdering(),
617 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
618 return true;
619 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
620 return expandAtomicLoadToLL(LI);
621 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
622 return expandAtomicLoadToCmpXchg(LI);
623 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
624 LI->setAtomic(AtomicOrdering::NotAtomic);
625 return true;
626 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
627 TLI->emitExpandAtomicLoad(LI);
628 return true;
629 default:
630 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
631 }
632}
633
634bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
635 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
636 case TargetLoweringBase::AtomicExpansionKind::None:
637 return false;
638 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
639 TLI->emitExpandAtomicStore(SI);
640 return true;
641 case TargetLoweringBase::AtomicExpansionKind::Expand:
642 expandAtomicStoreToXChg(SI);
643 return true;
644 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
645 SI->setAtomic(AtomicOrdering::NotAtomic);
646 return true;
647 default:
648 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
649 }
650}
651
652bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
653 ReplacementIRBuilder Builder(LI, *DL);
654
655 // On some architectures, load-linked instructions are atomic for larger
656 // sizes than normal loads. For example, the only 64-bit load guaranteed
657 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
658 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
659 LI->getPointerOperand(), LI->getOrdering());
661
662 LI->replaceAllUsesWith(Val);
663 LI->eraseFromParent();
664
665 return true;
666}
667
668bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
669 ReplacementIRBuilder Builder(LI, *DL);
670 AtomicOrdering Order = LI->getOrdering();
671 if (Order == AtomicOrdering::Unordered)
672 Order = AtomicOrdering::Monotonic;
673
674 Value *Addr = LI->getPointerOperand();
675 Type *Ty = LI->getType();
676
677 // cmpxchg supports only integer and pointer operands. If the load type is
678 // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
679 // result back; mirrors createCmpXchgInstFun.
680 bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
681 Type *CmpXchgTy = Ty;
682 if (NeedBitcast)
683 CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
684 Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
685
686 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
687 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
689 LI->getSyncScopeID());
690 Pair->setVolatile(LI->isVolatile());
691 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
692 if (NeedBitcast)
693 Loaded = Builder.CreateBitCast(Loaded, Ty);
694
695 LI->replaceAllUsesWith(Loaded);
696 LI->eraseFromParent();
697
698 return true;
699}
700
701/// Convert an atomic store of a non-integral type to an integer store of the
702/// equivalent bitwidth. We used to not support floating point or vector
703/// atomics in the IR at all. The backends learned to deal with the bitcast
704/// idiom because that was the only way of expressing the notion of a atomic
705/// float or vector store. The long term plan is to teach each backend to
706/// instruction select from the original atomic store, but as a migration
707/// mechanism, we convert back to the old format which the backends understand.
708/// Each backend will need individual work to recognize the new format.
709StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
710 ReplacementIRBuilder Builder(SI, *DL);
711 auto *M = SI->getModule();
712 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
713 M->getDataLayout());
714 Value *NewVal = SI->getValueOperand()->getType()->isPtrOrPtrVectorTy()
715 ? Builder.CreatePtrToInt(SI->getValueOperand(), NewTy)
716 : Builder.CreateBitCast(SI->getValueOperand(), NewTy);
717
718 Value *Addr = SI->getPointerOperand();
719
720 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
721 NewSI->setAlignment(SI->getAlign());
722 NewSI->setVolatile(SI->isVolatile());
723 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
724 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
725 SI->eraseFromParent();
726 return NewSI;
727}
728
729void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
730 // This function is only called on atomic stores that are too large to be
731 // atomic if implemented as a native store. So we replace them by an
732 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
733 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
734 // It is the responsibility of the target to only signal expansion via
735 // shouldExpandAtomicRMW in cases where this is required and possible.
736 ReplacementIRBuilder Builder(SI, *DL);
737 AtomicOrdering Ordering = SI->getOrdering();
738 assert(Ordering != AtomicOrdering::NotAtomic);
739 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
740 ? AtomicOrdering::Monotonic
741 : Ordering;
742 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
743 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
744 SI->getAlign(), RMWOrdering, SI->getSyncScopeID());
745 AI->setVolatile(SI->isVolatile());
746 SI->eraseFromParent();
747
748 // Now we have an appropriate swap instruction, lower it as usual.
749 tryExpandAtomicRMW(AI);
750}
751
752static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
753 Value *Loaded, Value *NewVal, Align AddrAlign,
754 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
755 bool IsVolatile, Value *&Success,
756 Value *&NewLoaded, Instruction *MetadataSrc) {
757 Type *OrigTy = NewVal->getType();
758
759 // This code can go away when cmpxchg supports FP and vector types.
760 assert(!OrigTy->isPointerTy());
761 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
762 if (NeedBitcast) {
763 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
764 NewVal = Builder.CreateBitCast(NewVal, IntTy);
765 Loaded = Builder.CreateBitCast(Loaded, IntTy);
766 }
767
768 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
769 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
771 Pair->setVolatile(IsVolatile);
772 if (MetadataSrc)
773 copyMetadataForAtomic(*Pair, *MetadataSrc);
774
775 Success = Builder.CreateExtractValue(Pair, 1, "success");
776 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
777
778 if (NeedBitcast)
779 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
780}
781
782bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
783 LLVMContext &Ctx = AI->getModule()->getContext();
784 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
785 switch (Kind) {
786 case TargetLoweringBase::AtomicExpansionKind::None:
787 return false;
788 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
789 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
790 unsigned ValueSize = getAtomicOpSize(AI);
791 if (ValueSize < MinCASSize) {
792 expandPartwordAtomicRMW(AI,
793 TargetLoweringBase::AtomicExpansionKind::LLSC);
794 } else {
795 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
796 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
797 AI->getValOperand());
798 };
799 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
800 AI->getAlign(), AI->getOrdering(), PerformOp);
801 }
802 return true;
803 }
804 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
805 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
806 unsigned ValueSize = getAtomicOpSize(AI);
807 if (ValueSize < MinCASSize) {
808 expandPartwordAtomicRMW(AI,
809 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
810 } else {
812 Ctx.getSyncScopeNames(SSNs);
813 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
814 ? "system"
815 : SSNs[AI->getSyncScopeID()];
816 OptimizationRemarkEmitter ORE(AI->getFunction());
817 ORE.emit([&]() {
818 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
819 << "A compare and swap loop was generated for an atomic "
820 << AI->getOperationName(AI->getOperation()) << " operation at "
821 << MemScope << " memory scope";
822 });
823 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
824 }
825 return true;
826 }
827 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
828 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
829 unsigned ValueSize = getAtomicOpSize(AI);
830 if (ValueSize < MinCASSize) {
832 // Widen And/Or/Xor and give the target another chance at expanding it.
835 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
836 return true;
837 }
838 }
839 expandAtomicRMWToMaskedIntrinsic(AI);
840 return true;
841 }
842 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
844 return true;
845 }
846 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
848 return true;
849 }
850 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
851 return lowerAtomicRMWInst(AI);
852 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
853 TLI->emitExpandAtomicRMW(AI);
854 return true;
855 default:
856 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
857 }
858}
859
860namespace {
861
862struct PartwordMaskValues {
863 // These three fields are guaranteed to be set by createMaskInstrs.
864 Type *WordType = nullptr;
865 Type *ValueType = nullptr;
866 Type *IntValueType = nullptr;
867 Value *AlignedAddr = nullptr;
868 Align AlignedAddrAlignment;
869 // The remaining fields can be null.
870 Value *ShiftAmt = nullptr;
871 Value *Mask = nullptr;
872 Value *Inv_Mask = nullptr;
873};
874
875[[maybe_unused]]
876raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
877 auto PrintObj = [&O](auto *V) {
878 if (V)
879 O << *V;
880 else
881 O << "nullptr";
882 O << '\n';
883 };
884 O << "PartwordMaskValues {\n";
885 O << " WordType: ";
886 PrintObj(PMV.WordType);
887 O << " ValueType: ";
888 PrintObj(PMV.ValueType);
889 O << " AlignedAddr: ";
890 PrintObj(PMV.AlignedAddr);
891 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
892 O << " ShiftAmt: ";
893 PrintObj(PMV.ShiftAmt);
894 O << " Mask: ";
895 PrintObj(PMV.Mask);
896 O << " Inv_Mask: ";
897 PrintObj(PMV.Inv_Mask);
898 O << "}\n";
899 return O;
900}
901
902} // end anonymous namespace
903
904/// This is a helper function which builds instructions to provide
905/// values necessary for partword atomic operations. It takes an
906/// incoming address, Addr, and ValueType, and constructs the address,
907/// shift-amounts and masks needed to work with a larger value of size
908/// WordSize.
909///
910/// AlignedAddr: Addr rounded down to a multiple of WordSize
911///
912/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
913/// from AlignAddr for it to have the same value as if
914/// ValueType was loaded from Addr.
915///
916/// Mask: Value to mask with the value loaded from AlignAddr to
917/// include only the part that would've been loaded from Addr.
918///
919/// Inv_Mask: The inverse of Mask.
920static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
922 Value *Addr, Align AddrAlign,
923 unsigned MinWordSize) {
924 PartwordMaskValues PMV;
925
926 Module *M = I->getModule();
927 LLVMContext &Ctx = M->getContext();
928 const DataLayout &DL = M->getDataLayout();
929 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
930
931 PMV.ValueType = PMV.IntValueType = ValueType;
932 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
933 PMV.IntValueType =
934 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
935
936 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
937 : ValueType;
938 if (PMV.ValueType == PMV.WordType) {
939 PMV.AlignedAddr = Addr;
940 PMV.AlignedAddrAlignment = AddrAlign;
941 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
942 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
943 return PMV;
944 }
945
946 PMV.AlignedAddrAlignment = Align(MinWordSize);
947
948 assert(ValueSize < MinWordSize);
949
950 PointerType *PtrTy = cast<PointerType>(Addr->getType());
951 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
952 Value *PtrLSB;
953
954 if (AddrAlign < MinWordSize) {
955 PMV.AlignedAddr = Builder.CreateIntrinsic(
956 Intrinsic::ptrmask, {PtrTy, IntTy},
957 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
958 nullptr, "AlignedAddr");
959
960 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
961 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
962 } else {
963 // If the alignment is high enough, the LSB are known 0.
964 PMV.AlignedAddr = Addr;
965 PtrLSB = ConstantInt::getNullValue(IntTy);
966 }
967
968 if (DL.isLittleEndian()) {
969 // turn bytes into bits
970 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
971 } else {
972 // turn bytes into bits, and count from the other side.
973 PMV.ShiftAmt = Builder.CreateShl(
974 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
975 }
976
977 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
978 PMV.Mask = Builder.CreateShl(
979 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
980 "Mask");
981
982 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
983
984 return PMV;
985}
986
987static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
988 const PartwordMaskValues &PMV) {
989 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
990 if (PMV.WordType == PMV.ValueType)
991 return WideWord;
992
993 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
994 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
995 return Builder.CreateBitCast(Trunc, PMV.ValueType);
996}
997
998static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
999 Value *Updated, const PartwordMaskValues &PMV) {
1000 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
1001 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
1002 if (PMV.WordType == PMV.ValueType)
1003 return Updated;
1004
1005 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
1006
1007 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
1008 Value *Shift =
1009 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
1010 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
1011 Value *Or = Builder.CreateOr(And, Shift, "inserted");
1012 return Or;
1013}
1014
1015/// Emit IR to implement a masked version of a given atomicrmw
1016/// operation. (That is, only the bits under the Mask should be
1017/// affected by the operation)
1019 IRBuilderBase &Builder, Value *Loaded,
1020 Value *Shifted_Inc, Value *Inc,
1021 const PartwordMaskValues &PMV) {
1022 // TODO: update to use
1023 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1024 // to merge bits from two values without requiring PMV.Inv_Mask.
1025 switch (Op) {
1026 case AtomicRMWInst::Xchg: {
1027 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1028 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
1029 return FinalVal;
1030 }
1031 case AtomicRMWInst::Or:
1032 case AtomicRMWInst::Xor:
1033 case AtomicRMWInst::And:
1034 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1035 case AtomicRMWInst::Add:
1036 case AtomicRMWInst::Sub:
1037 case AtomicRMWInst::Nand: {
1038 // The other arithmetic ops need to be masked into place.
1039 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
1040 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
1041 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1042 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
1043 return FinalVal;
1044 }
1045 case AtomicRMWInst::Max:
1046 case AtomicRMWInst::Min:
1061 // Finally, other ops will operate on the full value, so truncate down to
1062 // the original size, and expand out again after doing the
1063 // operation. Bitcasts will be inserted for FP values.
1064 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1065 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1066 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1067 return FinalVal;
1068 }
1069 default:
1070 llvm_unreachable("Unknown atomic op");
1071 }
1072}
1073
1074/// Expand a sub-word atomicrmw operation into an appropriate
1075/// word-sized operation.
1076///
1077/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1078/// way as a typical atomicrmw expansion. The only difference here is
1079/// that the operation inside of the loop may operate upon only a
1080/// part of the value.
1081void AtomicExpandImpl::expandPartwordAtomicRMW(
1082 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1083 // Widen And/Or/Xor and give the target another chance at expanding it.
1087 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1088 return;
1089 }
1090 AtomicOrdering MemOpOrder = AI->getOrdering();
1091 SyncScope::ID SSID = AI->getSyncScopeID();
1092
1093 ReplacementIRBuilder Builder(AI, *DL);
1094
1095 PartwordMaskValues PMV =
1096 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1097 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1098
1099 Value *ValOperand_Shifted = nullptr;
1102 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1103 ValOperand_Shifted =
1104 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1105 "ValOperand_Shifted");
1106 }
1107
1108 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1109 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1110 AI->getValOperand(), PMV);
1111 };
1112
1113 Value *OldResult;
1114 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1115 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1116 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
1117 AI->isVolatile(), PerformPartwordOp,
1119 } else {
1120 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1121 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1122 PMV.AlignedAddrAlignment, MemOpOrder,
1123 PerformPartwordOp);
1124 }
1125
1126 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1127 AI->replaceAllUsesWith(FinalOldResult);
1128 AI->eraseFromParent();
1129}
1130
1131// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1132AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1133 ReplacementIRBuilder Builder(AI, *DL);
1135
1137 Op == AtomicRMWInst::And) &&
1138 "Unable to widen operation");
1139
1140 PartwordMaskValues PMV =
1141 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1142 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1143
1144 Value *ValOperand_Shifted =
1145 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1146 PMV.ShiftAmt, "ValOperand_Shifted");
1147
1148 Value *NewOperand;
1149
1150 if (Op == AtomicRMWInst::And)
1151 NewOperand =
1152 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1153 else
1154 NewOperand = ValOperand_Shifted;
1155
1156 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1157 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1158 AI->getOrdering(), AI->getSyncScopeID());
1159
1160 NewAI->setVolatile(AI->isVolatile());
1161 copyMetadataForAtomic(*NewAI, *AI);
1162
1163 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1164 AI->replaceAllUsesWith(FinalOldResult);
1165 AI->eraseFromParent();
1166 return NewAI;
1167}
1168
1169bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1170 // The basic idea here is that we're expanding a cmpxchg of a
1171 // smaller memory size up to a word-sized cmpxchg. To do this, we
1172 // need to add a retry-loop for strong cmpxchg, so that
1173 // modifications to other parts of the word don't cause a spurious
1174 // failure.
1175
1176 // This generates code like the following:
1177 // [[Setup mask values PMV.*]]
1178 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1179 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1180 // %InitLoaded = load i32* %addr
1181 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1182 // br partword.cmpxchg.loop
1183 // partword.cmpxchg.loop:
1184 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1185 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1186 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1187 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1188 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1189 // i32 %FullWord_NewVal success_ordering failure_ordering
1190 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1191 // %Success = extractvalue { i32, i1 } %NewCI, 1
1192 // br i1 %Success, label %partword.cmpxchg.end,
1193 // label %partword.cmpxchg.failure
1194 // partword.cmpxchg.failure:
1195 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1196 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1197 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1198 // label %partword.cmpxchg.end
1199 // partword.cmpxchg.end:
1200 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1201 // %FinalOldVal = trunc i32 %tmp1 to i8
1202 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1203 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1204
1205 Value *Addr = CI->getPointerOperand();
1206 Value *Cmp = CI->getCompareOperand();
1207 Value *NewVal = CI->getNewValOperand();
1208
1209 BasicBlock *BB = CI->getParent();
1210 Function *F = BB->getParent();
1211 ReplacementIRBuilder Builder(CI, *DL);
1212 LLVMContext &Ctx = Builder.getContext();
1213
1214 BasicBlock *EndBB =
1215 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1216 auto FailureBB =
1217 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1218 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1219
1220 // The split call above "helpfully" added a branch at the end of BB
1221 // (to the wrong place).
1222 std::prev(BB->end())->eraseFromParent();
1223 Builder.SetInsertPoint(BB);
1224
1225 PartwordMaskValues PMV =
1226 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1227 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1228
1229 // Shift the incoming values over, into the right location in the word.
1230 Value *NewVal_Shifted =
1231 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1232 Value *Cmp_Shifted =
1233 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1234
1235 // Load the entire current word, and mask into place the expected and new
1236 // values
1237 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1238 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1239 Builder.CreateBr(LoopBB);
1240
1241 // partword.cmpxchg.loop:
1242 Builder.SetInsertPoint(LoopBB);
1243 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1244 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1245
1246 // The initial load must be atomic with the same synchronization scope
1247 // to avoid a data race with concurrent stores. If the instruction being
1248 // emulated is volatile, issue a volatile load.
1249 // addIncoming is done first so that any replaceAllUsesWith calls during
1250 // normalization correctly update the PHI incoming value.
1251 InitLoaded->setVolatile(CI->isVolatile());
1253 InitLoaded->setAtomic(AtomicOrdering::Monotonic, CI->getSyncScopeID());
1254 // The newly created load might need to be lowered further. Because it is
1255 // created in the same block as the atomicrmw, the AtomicExpand loop will
1256 // not process it again.
1257 processAtomicInstr(InitLoaded);
1258 }
1259
1260 // Mask/Or the expected and new values into place in the loaded word.
1261 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1262 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1263 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1264 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1266 NewCI->setVolatile(CI->isVolatile());
1267 // When we're building a strong cmpxchg, we need a loop, so you
1268 // might think we could use a weak cmpxchg inside. But, using strong
1269 // allows the below comparison for ShouldContinue, and we're
1270 // expecting the underlying cmpxchg to be a machine instruction,
1271 // which is strong anyways.
1272 NewCI->setWeak(CI->isWeak());
1273
1274 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1275 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1276
1277 if (CI->isWeak())
1278 Builder.CreateBr(EndBB);
1279 else
1280 Builder.CreateCondBr(Success, EndBB, FailureBB);
1281
1282 // partword.cmpxchg.failure:
1283 Builder.SetInsertPoint(FailureBB);
1284 // Upon failure, verify that the masked-out part of the loaded value
1285 // has been modified. If it didn't, abort the cmpxchg, since the
1286 // masked-in part must've.
1287 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1288 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1289 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1290
1291 // Add the second value to the phi from above
1292 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1293
1294 // partword.cmpxchg.end:
1295 Builder.SetInsertPoint(CI);
1296
1297 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1298 Value *Res = PoisonValue::get(CI->getType());
1299 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1300 Res = Builder.CreateInsertValue(Res, Success, 1);
1301
1302 CI->replaceAllUsesWith(Res);
1303 CI->eraseFromParent();
1304 return true;
1305}
1306
1307void AtomicExpandImpl::expandAtomicOpToLLSC(
1308 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1309 AtomicOrdering MemOpOrder,
1310 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1311 ReplacementIRBuilder Builder(I, *DL);
1312 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1313 MemOpOrder, PerformOp);
1314
1315 I->replaceAllUsesWith(Loaded);
1316 I->eraseFromParent();
1317}
1318
1319void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1320 ReplacementIRBuilder Builder(AI, *DL);
1321
1322 PartwordMaskValues PMV =
1323 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1324 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1325
1326 // The value operand must be sign-extended for signed min/max so that the
1327 // target's signed comparison instructions can be used. Otherwise, just
1328 // zero-ext.
1329 Instruction::CastOps CastOp = Instruction::ZExt;
1330 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1331 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1332 CastOp = Instruction::SExt;
1333
1334 Value *ValOperand_Shifted = Builder.CreateShl(
1335 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1336 PMV.ShiftAmt, "ValOperand_Shifted");
1337 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1338 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1339 AI->getOrdering());
1340 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1341 AI->replaceAllUsesWith(FinalOldResult);
1342 AI->eraseFromParent();
1343}
1344
1345void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1346 AtomicCmpXchgInst *CI) {
1347 ReplacementIRBuilder Builder(CI, *DL);
1348
1349 PartwordMaskValues PMV = createMaskInstrs(
1350 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1351 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1352
1353 Value *CmpVal_Shifted = Builder.CreateShl(
1354 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1355 "CmpVal_Shifted");
1356 Value *NewVal_Shifted = Builder.CreateShl(
1357 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1358 "NewVal_Shifted");
1360 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1361 CI->getMergedOrdering());
1362 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1363 Value *Res = PoisonValue::get(CI->getType());
1364 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1365 Value *Success = Builder.CreateICmpEQ(
1366 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1367 Res = Builder.CreateInsertValue(Res, Success, 1);
1368
1369 CI->replaceAllUsesWith(Res);
1370 CI->eraseFromParent();
1371}
1372
1373Value *AtomicExpandImpl::insertRMWLLSCLoop(
1374 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1375 AtomicOrdering MemOpOrder,
1376 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1377 LLVMContext &Ctx = Builder.getContext();
1378 BasicBlock *BB = Builder.GetInsertBlock();
1379 Function *F = BB->getParent();
1380
1381 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1382 "Expected at least natural alignment at this point.");
1383
1384 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1385 //
1386 // The standard expansion we produce is:
1387 // [...]
1388 // atomicrmw.start:
1389 // %loaded = @load.linked(%addr)
1390 // %new = some_op iN %loaded, %incr
1391 // %stored = @store_conditional(%new, %addr)
1392 // %try_again = icmp i32 ne %stored, 0
1393 // br i1 %try_again, label %loop, label %atomicrmw.end
1394 // atomicrmw.end:
1395 // [...]
1396 BasicBlock *ExitBB =
1397 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1398 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1399
1400 // The split call above "helpfully" added a branch at the end of BB (to the
1401 // wrong place).
1402 std::prev(BB->end())->eraseFromParent();
1403 Builder.SetInsertPoint(BB);
1404 Builder.CreateBr(LoopBB);
1405
1406 // Start the main loop block now that we've taken care of the preliminaries.
1407 Builder.SetInsertPoint(LoopBB);
1408 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1409
1410 Value *NewVal = PerformOp(Builder, Loaded);
1411
1412 Value *StoreSuccess =
1413 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1414 Value *TryAgain = Builder.CreateICmpNE(
1415 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1416
1417 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1418
1419 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1420 // hard to predict precise branch weigths we mark the branch as "unknown"
1421 // (50/50) to prevent misleading optimizations.
1423
1424 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1425 return Loaded;
1426}
1427
1428/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1429/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1430/// IR. As a migration step, we convert back to what use to be the standard
1431/// way to represent a pointer cmpxchg so that we can update backends one by
1432/// one.
1433AtomicCmpXchgInst *
1434AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1435 auto *M = CI->getModule();
1436 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1437 M->getDataLayout());
1438
1439 ReplacementIRBuilder Builder(CI, *DL);
1440
1441 Value *Addr = CI->getPointerOperand();
1442
1443 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1444 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1445
1446 auto *NewCI = Builder.CreateAtomicCmpXchg(
1447 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1448 CI->getFailureOrdering(), CI->getSyncScopeID());
1449 NewCI->setVolatile(CI->isVolatile());
1450 NewCI->setWeak(CI->isWeak());
1451 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1452
1453 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1454 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1455
1456 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1457
1458 Value *Res = PoisonValue::get(CI->getType());
1459 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1460 Res = Builder.CreateInsertValue(Res, Succ, 1);
1461
1462 CI->replaceAllUsesWith(Res);
1463 CI->eraseFromParent();
1464 return NewCI;
1465}
1466
1467bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1468 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1469 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1470 Value *Addr = CI->getPointerOperand();
1471 BasicBlock *BB = CI->getParent();
1472 Function *F = BB->getParent();
1473 LLVMContext &Ctx = F->getContext();
1474 // If shouldInsertFencesForAtomic() returns true, then the target does not
1475 // want to deal with memory orders, and emitLeading/TrailingFence should take
1476 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1477 // should preserve the ordering.
1478 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1479 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1480 ? AtomicOrdering::Monotonic
1481 : CI->getMergedOrdering();
1482
1483 // In implementations which use a barrier to achieve release semantics, we can
1484 // delay emitting this barrier until we know a store is actually going to be
1485 // attempted. The cost of this delay is that we need 2 copies of the block
1486 // emitting the load-linked, affecting code size.
1487 //
1488 // Ideally, this logic would be unconditional except for the minsize check
1489 // since in other cases the extra blocks naturally collapse down to the
1490 // minimal loop. Unfortunately, this puts too much stress on later
1491 // optimisations so we avoid emitting the extra logic in those cases too.
1492 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1493 SuccessOrder != AtomicOrdering::Monotonic &&
1494 SuccessOrder != AtomicOrdering::Acquire &&
1495 !F->hasMinSize();
1496
1497 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1498 // do it even on minsize.
1499 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1500
1501 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1502 //
1503 // The full expansion we produce is:
1504 // [...]
1505 // %aligned.addr = ...
1506 // cmpxchg.start:
1507 // %unreleasedload = @load.linked(%aligned.addr)
1508 // %unreleasedload.extract = extract value from %unreleasedload
1509 // %should_store = icmp eq %unreleasedload.extract, %desired
1510 // br i1 %should_store, label %cmpxchg.releasingstore,
1511 // label %cmpxchg.nostore
1512 // cmpxchg.releasingstore:
1513 // fence?
1514 // br label cmpxchg.trystore
1515 // cmpxchg.trystore:
1516 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1517 // [%releasedload, %cmpxchg.releasedload]
1518 // %updated.new = insert %new into %loaded.trystore
1519 // %stored = @store_conditional(%updated.new, %aligned.addr)
1520 // %success = icmp eq i32 %stored, 0
1521 // br i1 %success, label %cmpxchg.success,
1522 // label %cmpxchg.releasedload/%cmpxchg.failure
1523 // cmpxchg.releasedload:
1524 // %releasedload = @load.linked(%aligned.addr)
1525 // %releasedload.extract = extract value from %releasedload
1526 // %should_store = icmp eq %releasedload.extract, %desired
1527 // br i1 %should_store, label %cmpxchg.trystore,
1528 // label %cmpxchg.failure
1529 // cmpxchg.success:
1530 // fence?
1531 // br label %cmpxchg.end
1532 // cmpxchg.nostore:
1533 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1534 // [%releasedload,
1535 // %cmpxchg.releasedload/%cmpxchg.trystore]
1536 // @load_linked_fail_balance()?
1537 // br label %cmpxchg.failure
1538 // cmpxchg.failure:
1539 // fence?
1540 // br label %cmpxchg.end
1541 // cmpxchg.end:
1542 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1543 // [%loaded.trystore, %cmpxchg.trystore]
1544 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1545 // %loaded = extract value from %loaded.exit
1546 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1547 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1548 // [...]
1549 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1550 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1551 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1552 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1553 auto ReleasedLoadBB =
1554 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1555 auto TryStoreBB =
1556 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1557 auto ReleasingStoreBB =
1558 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1559 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1560
1561 ReplacementIRBuilder Builder(CI, *DL);
1562
1563 // The split call above "helpfully" added a branch at the end of BB (to the
1564 // wrong place), but we might want a fence too. It's easiest to just remove
1565 // the branch entirely.
1566 std::prev(BB->end())->eraseFromParent();
1567 Builder.SetInsertPoint(BB);
1568 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1569 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1570
1571 PartwordMaskValues PMV =
1572 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1573 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1574 Builder.CreateBr(StartBB);
1575
1576 // Start the main loop block now that we've taken care of the preliminaries.
1577 Builder.SetInsertPoint(StartBB);
1578 Value *UnreleasedLoad =
1579 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1580 Value *UnreleasedLoadExtract =
1581 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1582 Value *ShouldStore = Builder.CreateICmpEQ(
1583 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1584
1585 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1586 // jump straight past that fence instruction (if it exists).
1587 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1588 MDBuilder(F->getContext()).createLikelyBranchWeights());
1589
1590 Builder.SetInsertPoint(ReleasingStoreBB);
1591 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1592 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1593 Builder.CreateBr(TryStoreBB);
1594
1595 Builder.SetInsertPoint(TryStoreBB);
1596 PHINode *LoadedTryStore =
1597 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1598 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1599 Value *NewValueInsert =
1600 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1601 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1602 PMV.AlignedAddr, MemOpOrder);
1603 StoreSuccess = Builder.CreateICmpEQ(
1604 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1605 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1606 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1607 CI->isWeak() ? FailureBB : RetryBB,
1608 MDBuilder(F->getContext()).createLikelyBranchWeights());
1609
1610 Builder.SetInsertPoint(ReleasedLoadBB);
1611 Value *SecondLoad;
1612 if (HasReleasedLoadBB) {
1613 SecondLoad =
1614 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1615 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1616 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1617 CI->getCompareOperand(), "should_store");
1618
1619 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1620 // jump straight past that fence instruction (if it exists).
1621 Builder.CreateCondBr(
1622 ShouldStore, TryStoreBB, NoStoreBB,
1623 MDBuilder(F->getContext()).createLikelyBranchWeights());
1624 // Update PHI node in TryStoreBB.
1625 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1626 } else
1627 Builder.CreateUnreachable();
1628
1629 // Make sure later instructions don't get reordered with a fence if
1630 // necessary.
1631 Builder.SetInsertPoint(SuccessBB);
1632 if (ShouldInsertFencesForAtomic ||
1634 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1635 Builder.CreateBr(ExitBB);
1636
1637 Builder.SetInsertPoint(NoStoreBB);
1638 PHINode *LoadedNoStore =
1639 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1640 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1641 if (HasReleasedLoadBB)
1642 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1643
1644 // In the failing case, where we don't execute the store-conditional, the
1645 // target might want to balance out the load-linked with a dedicated
1646 // instruction (e.g., on ARM, clearing the exclusive monitor).
1648 Builder.CreateBr(FailureBB);
1649
1650 Builder.SetInsertPoint(FailureBB);
1651 PHINode *LoadedFailure =
1652 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1653 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1654 if (CI->isWeak())
1655 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1656 if (ShouldInsertFencesForAtomic)
1657 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1658 Builder.CreateBr(ExitBB);
1659
1660 // Finally, we have control-flow based knowledge of whether the cmpxchg
1661 // succeeded or not. We expose this to later passes by converting any
1662 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1663 // PHI.
1664 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1665 PHINode *LoadedExit =
1666 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1667 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1668 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1669 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1670 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1671 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1672
1673 // This is the "exit value" from the cmpxchg expansion. It may be of
1674 // a type wider than the one in the cmpxchg instruction.
1675 Value *LoadedFull = LoadedExit;
1676
1677 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1678 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1679
1680 // Look for any users of the cmpxchg that are just comparing the loaded value
1681 // against the desired one, and replace them with the CFG-derived version.
1683 for (auto *User : CI->users()) {
1684 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1685 if (!EV)
1686 continue;
1687
1688 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1689 "weird extraction from { iN, i1 }");
1690
1691 if (EV->getIndices()[0] == 0)
1692 EV->replaceAllUsesWith(Loaded);
1693 else
1695
1696 PrunedInsts.push_back(EV);
1697 }
1698
1699 // We can remove the instructions now we're no longer iterating through them.
1700 for (auto *EV : PrunedInsts)
1701 EV->eraseFromParent();
1702
1703 if (!CI->use_empty()) {
1704 // Some use of the full struct return that we don't understand has happened,
1705 // so we've got to reconstruct it properly.
1706 Value *Res;
1707 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1708 Res = Builder.CreateInsertValue(Res, Success, 1);
1709
1710 CI->replaceAllUsesWith(Res);
1711 }
1712
1713 CI->eraseFromParent();
1714 return true;
1715}
1716
1717bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1718 if (RMWI->isVolatile())
1719 return false;
1720 // TODO: Add floating point support.
1721 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1722 if (!C)
1723 return false;
1724
1725 switch (RMWI->getOperation()) {
1726 case AtomicRMWInst::Add:
1727 case AtomicRMWInst::Sub:
1728 case AtomicRMWInst::Or:
1729 case AtomicRMWInst::Xor:
1730 return C->isZero();
1731 case AtomicRMWInst::And:
1732 return C->isMinusOne();
1733 case AtomicRMWInst::Min:
1734 return C->isMaxValue(true);
1735 case AtomicRMWInst::Max:
1736 return C->isMinValue(true);
1738 return C->isMaxValue(false);
1740 return C->isMinValue(false);
1741 default:
1742 return false;
1743 }
1744}
1745
1746bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1747 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1748 tryExpandAtomicLoad(ResultingLoad);
1749 return true;
1750 }
1751 return false;
1752}
1753
1754Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1755 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1756 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
1757 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1758 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1759 LLVMContext &Ctx = Builder.getContext();
1760 BasicBlock *BB = Builder.GetInsertBlock();
1761 Function *F = BB->getParent();
1762
1763 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1764 //
1765 // The standard expansion we produce is:
1766 // [...]
1767 // %init_loaded = load atomic iN* %addr
1768 // br label %loop
1769 // loop:
1770 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1771 // %new = some_op iN %loaded, %incr
1772 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1773 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1774 // %success = extractvalue { iN, i1 } %pair, 1
1775 // br i1 %success, label %atomicrmw.end, label %loop
1776 // atomicrmw.end:
1777 // [...]
1778 BasicBlock *ExitBB =
1779 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1780 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1781
1782 // The split call above "helpfully" added a branch at the end of BB (to the
1783 // wrong place), but we want a load. It's easiest to just remove
1784 // the branch entirely.
1785 std::prev(BB->end())->eraseFromParent();
1786 Builder.SetInsertPoint(BB);
1787 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1788 Builder.CreateBr(LoopBB);
1789
1790 // Start the main loop block now that we've taken care of the preliminaries.
1791 Builder.SetInsertPoint(LoopBB);
1792 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1793 Loaded->addIncoming(InitLoaded, BB);
1794
1795 // The initial load must be atomic with the same synchronization scope
1796 // to avoid a data race with concurrent stores. If the instruction being
1797 // emulated is volatile, issue a volatile load.
1798 // addIncoming is done first so that any replaceAllUsesWith calls during
1799 // normalization correctly update the PHI incoming value.
1800 InitLoaded->setVolatile(IsVolatile);
1802 InitLoaded->setAtomic(AtomicOrdering::Monotonic, SSID);
1803 // The newly created load might need to be lowered further. Because it is
1804 // created in the same block as the atomicrmw, the AtomicExpand loop will
1805 // not process it again.
1806 processAtomicInstr(InitLoaded);
1807 }
1808
1809 Value *NewVal = PerformOp(Builder, Loaded);
1810
1811 Value *NewLoaded = nullptr;
1812 Value *Success = nullptr;
1813
1814 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1815 MemOpOrder == AtomicOrdering::Unordered
1816 ? AtomicOrdering::Monotonic
1817 : MemOpOrder,
1818 SSID, IsVolatile, Success, NewLoaded, MetadataSrc);
1819 assert(Success && NewLoaded);
1820
1821 Loaded->addIncoming(NewLoaded, LoopBB);
1822
1823 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1824
1825 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1826 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1827 // to prevent misleading optimizations.
1829
1830 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1831 return NewLoaded;
1832}
1833
1834bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1835 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1836 unsigned ValueSize = getAtomicOpSize(CI);
1837
1838 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1839 default:
1840 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1841 case TargetLoweringBase::AtomicExpansionKind::None:
1842 if (ValueSize < MinCASSize)
1843 return expandPartwordCmpXchg(CI);
1844 return false;
1845 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1846 return expandAtomicCmpXchg(CI);
1847 }
1848 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1849 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1850 return true;
1851 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1852 return lowerAtomicCmpXchgInst(CI);
1853 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1854 TLI->emitExpandAtomicCmpXchg(CI);
1855 return true;
1856 }
1857 }
1858}
1859
1860bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1861 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1862 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1863 Builder.setIsFPConstrained(
1864 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1865
1866 // FIXME: If FP exceptions are observable, we should force them off for the
1867 // loop for the FP atomics.
1868 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1869 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1870 AI->getOrdering(), AI->getSyncScopeID(), AI->isVolatile(),
1871 [&](IRBuilderBase &Builder, Value *Loaded) {
1872 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1873 AI->getValOperand());
1874 },
1875 CreateCmpXchg, /*MetadataSrc=*/AI);
1876
1877 AI->replaceAllUsesWith(Loaded);
1878 AI->eraseFromParent();
1879 return true;
1880}
1881
1882// In order to use one of the sized library calls such as
1883// __atomic_fetch_add_4, the alignment must be sufficient, the size
1884// must be one of the potentially-specialized sizes, and the value
1885// type must actually exist in C on the target (otherwise, the
1886// function wouldn't actually be defined.)
1887static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1888 const DataLayout &DL) {
1889 // TODO: "LargestSize" is an approximation for "largest type that
1890 // you can express in C". It seems to be the case that int128 is
1891 // supported on all 64-bit platforms, otherwise only up to 64-bit
1892 // integers are supported. If we get this wrong, then we'll try to
1893 // call a sized libcall that doesn't actually exist. There should
1894 // really be some more reliable way in LLVM of determining integer
1895 // sizes which are valid in the target's C ABI...
1896 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1897 return Alignment >= Size &&
1898 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1899 Size <= LargestSize;
1900}
1901
1902void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1903 static const RTLIB::Libcall Libcalls[6] = {
1904 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1905 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1906 unsigned Size = getAtomicOpSize(I);
1907
1908 bool Expanded = expandAtomicOpToLibcall(
1909 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1910 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1911 if (!Expanded)
1912 handleUnsupportedAtomicSize(I, "atomic load");
1913}
1914
1915void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1916 static const RTLIB::Libcall Libcalls[6] = {
1917 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1918 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1919 unsigned Size = getAtomicOpSize(I);
1920
1921 bool Expanded = expandAtomicOpToLibcall(
1922 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1923 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1924 if (!Expanded)
1925 handleUnsupportedAtomicSize(I, "atomic store");
1926}
1927
1928void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1929 const Twine &AtomicOpName,
1930 Instruction *DiagnosticInst) {
1931 static const RTLIB::Libcall Libcalls[6] = {
1932 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1933 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1934 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1935 unsigned Size = getAtomicOpSize(I);
1936
1937 bool Expanded = expandAtomicOpToLibcall(
1938 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1939 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1940 Libcalls);
1941 if (!Expanded)
1942 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1943}
1944
1946 static const RTLIB::Libcall LibcallsXchg[6] = {
1947 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1948 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1949 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1950 static const RTLIB::Libcall LibcallsAdd[6] = {
1951 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1952 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1953 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1954 static const RTLIB::Libcall LibcallsSub[6] = {
1955 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1956 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1957 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1958 static const RTLIB::Libcall LibcallsAnd[6] = {
1959 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1960 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1961 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1962 static const RTLIB::Libcall LibcallsOr[6] = {
1963 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1964 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1965 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1966 static const RTLIB::Libcall LibcallsXor[6] = {
1967 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1968 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1969 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1970 static const RTLIB::Libcall LibcallsNand[6] = {
1971 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1972 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1973 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1974
1975 switch (Op) {
1977 llvm_unreachable("Should not have BAD_BINOP.");
1979 return ArrayRef(LibcallsXchg);
1980 case AtomicRMWInst::Add:
1981 return ArrayRef(LibcallsAdd);
1982 case AtomicRMWInst::Sub:
1983 return ArrayRef(LibcallsSub);
1984 case AtomicRMWInst::And:
1985 return ArrayRef(LibcallsAnd);
1986 case AtomicRMWInst::Or:
1987 return ArrayRef(LibcallsOr);
1988 case AtomicRMWInst::Xor:
1989 return ArrayRef(LibcallsXor);
1991 return ArrayRef(LibcallsNand);
1992 case AtomicRMWInst::Max:
1993 case AtomicRMWInst::Min:
2008 // No atomic libcalls are available for these.
2009 return {};
2010 }
2011 llvm_unreachable("Unexpected AtomicRMW operation.");
2012}
2013
2014void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
2015 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
2016
2017 unsigned Size = getAtomicOpSize(I);
2018
2019 bool Success = false;
2020 if (!Libcalls.empty())
2021 Success = expandAtomicOpToLibcall(
2022 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
2023 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
2024
2025 // The expansion failed: either there were no libcalls at all for
2026 // the operation (min/max), or there were only size-specialized
2027 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
2028 // CAS libcall, via a CAS loop, instead.
2029 if (!Success) {
2030 expandAtomicRMWToCmpXchg(
2031 I, [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
2032 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
2033 SyncScope::ID SSID, bool IsVolatile, Value *&Success,
2034 Value *&NewLoaded, Instruction *MetadataSrc) {
2035 // Create the CAS instruction normally...
2036 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
2037 Addr, Loaded, NewVal, Alignment, MemOpOrder,
2039 Pair->setVolatile(IsVolatile);
2040 if (MetadataSrc)
2041 copyMetadataForAtomic(*Pair, *MetadataSrc);
2042
2043 Success = Builder.CreateExtractValue(Pair, 1, "success");
2044 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
2045
2046 // ...and then expand the CAS into a libcall.
2047 expandAtomicCASToLibcall(
2048 Pair,
2049 "atomicrmw " + AtomicRMWInst::getOperationName(I->getOperation()),
2050 MetadataSrc);
2051 });
2052 }
2053}
2054
2055// A helper routine for the above expandAtomic*ToLibcall functions.
2056//
2057// 'Libcalls' contains an array of enum values for the particular
2058// ATOMIC libcalls to be emitted. All of the other arguments besides
2059// 'I' are extracted from the Instruction subclass by the
2060// caller. Depending on the particular call, some will be null.
2061bool AtomicExpandImpl::expandAtomicOpToLibcall(
2062 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2063 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2064 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2065 assert(Libcalls.size() == 6);
2066
2067 LLVMContext &Ctx = I->getContext();
2068 Module *M = I->getModule();
2069 const DataLayout &DL = M->getDataLayout();
2070 IRBuilder<> Builder(I);
2071 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2072
2073 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2074 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
2075
2076 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2077 Size == 16) {
2078 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2079 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2080 // rules handles this correctly if we pass it as a v2i64 rather than
2081 // i128. This is what Clang does in the frontend for such types as well
2082 // (see WinX86_64ABIInfo::classify in Clang).
2083 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
2084 }
2085
2086 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
2087
2088 // TODO: the "order" argument type is "int", not int32. So
2089 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2090 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2091 Constant *OrderingVal =
2092 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2093 Constant *Ordering2Val = nullptr;
2094 if (CASExpected) {
2095 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2096 Ordering2Val =
2097 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2098 }
2099 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2100
2101 RTLIB::Libcall RTLibType;
2102 if (UseSizedLibcall) {
2103 switch (Size) {
2104 case 1:
2105 RTLibType = Libcalls[1];
2106 break;
2107 case 2:
2108 RTLibType = Libcalls[2];
2109 break;
2110 case 4:
2111 RTLibType = Libcalls[3];
2112 break;
2113 case 8:
2114 RTLibType = Libcalls[4];
2115 break;
2116 case 16:
2117 RTLibType = Libcalls[5];
2118 break;
2119 }
2120 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2121 RTLibType = Libcalls[0];
2122 } else {
2123 // Can't use sized function, and there's no generic for this
2124 // operation, so give up.
2125 return false;
2126 }
2127
2128 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2129 if (LibcallImpl == RTLIB::Unsupported) {
2130 // This target does not implement the requested atomic libcall so give up.
2131 return false;
2132 }
2133
2134 // Build up the function call. There's two kinds. First, the sized
2135 // variants. These calls are going to be one of the following (with
2136 // N=1,2,4,8,16):
2137 // iN __atomic_load_N(iN *ptr, int ordering)
2138 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2139 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2140 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2141 // int success_order, int failure_order)
2142 //
2143 // Note that these functions can be used for non-integer atomic
2144 // operations, the values just need to be bitcast to integers on the
2145 // way in and out.
2146 //
2147 // And, then, the generic variants. They look like the following:
2148 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2149 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2150 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2151 // int ordering)
2152 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2153 // void *desired, int success_order,
2154 // int failure_order)
2155 //
2156 // The different signatures are built up depending on the
2157 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2158 // variables.
2159
2160 AllocaInst *AllocaCASExpected = nullptr;
2161 AllocaInst *AllocaValue = nullptr;
2162 AllocaInst *AllocaResult = nullptr;
2163
2164 Type *ResultTy;
2166 AttributeList Attr;
2167
2168 // 'size' argument.
2169 if (!UseSizedLibcall) {
2170 // Note, getIntPtrType is assumed equivalent to size_t.
2171 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2172 }
2173
2174 // 'ptr' argument.
2175 // note: This assumes all address spaces share a common libfunc
2176 // implementation and that addresses are convertable. For systems without
2177 // that property, we'd need to extend this mechanism to support AS-specific
2178 // families of atomic intrinsics.
2179 Value *PtrVal = PointerOperand;
2180 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2181 Args.push_back(PtrVal);
2182
2183 // 'expected' argument, if present.
2184 if (CASExpected) {
2185 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2186 AllocaCASExpected->setAlignment(AllocaAlignment);
2187 Builder.CreateLifetimeStart(AllocaCASExpected);
2188 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2189 Args.push_back(AllocaCASExpected);
2190 }
2191
2192 // 'val' argument ('desired' for cas), if present.
2193 if (ValueOperand) {
2194 if (UseSizedLibcall) {
2195 Value *IntValue =
2196 Builder.CreateBitPreservingCastChain(DL, ValueOperand, SizedIntTy);
2197 Args.push_back(IntValue);
2198 } else {
2199 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2200 AllocaValue->setAlignment(AllocaAlignment);
2201 Builder.CreateLifetimeStart(AllocaValue);
2202 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2203 Args.push_back(AllocaValue);
2204 }
2205 }
2206
2207 // 'ret' argument.
2208 if (!CASExpected && HasResult && !UseSizedLibcall) {
2209 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2210 AllocaResult->setAlignment(AllocaAlignment);
2211 Builder.CreateLifetimeStart(AllocaResult);
2212 Args.push_back(AllocaResult);
2213 }
2214
2215 // 'ordering' ('success_order' for cas) argument.
2216 Args.push_back(OrderingVal);
2217
2218 // 'failure_order' argument, if present.
2219 if (Ordering2Val)
2220 Args.push_back(Ordering2Val);
2221
2222 // Now, the return type.
2223 if (CASExpected) {
2224 ResultTy = Type::getInt1Ty(Ctx);
2225 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2226 } else if (HasResult && UseSizedLibcall)
2227 ResultTy = SizedIntTy;
2228 else
2229 ResultTy = Type::getVoidTy(Ctx);
2230
2231 // Done with setting up arguments and return types, create the call:
2233 for (Value *Arg : Args)
2234 ArgTys.push_back(Arg->getType());
2235 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2236 FunctionCallee LibcallFn = M->getOrInsertFunction(
2238 Attr);
2239 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2240 Call->setAttributes(Attr);
2241 Value *Result = Call;
2242
2243 // And then, extract the results...
2244 if (ValueOperand && !UseSizedLibcall)
2245 Builder.CreateLifetimeEnd(AllocaValue);
2246
2247 if (CASExpected) {
2248 // The final result from the CAS is {load of 'expected' alloca, bool result
2249 // from call}
2250 Type *FinalResultTy = I->getType();
2251 Value *V = PoisonValue::get(FinalResultTy);
2252 Value *ExpectedOut = Builder.CreateAlignedLoad(
2253 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2254 Builder.CreateLifetimeEnd(AllocaCASExpected);
2255 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2256 V = Builder.CreateInsertValue(V, Result, 1);
2258 } else if (HasResult) {
2259 Value *V;
2260 if (UseSizedLibcall) {
2261 // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2262 auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2263 auto *VTy = dyn_cast<VectorType>(I->getType());
2264 if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2265 unsigned AS = PtrTy->getAddressSpace();
2266 Value *BC = Builder.CreateBitCast(
2267 Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2268 V = Builder.CreateIntToPtr(BC, I->getType());
2269 } else
2270 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2271 } else {
2272 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2273 AllocaAlignment);
2274 Builder.CreateLifetimeEnd(AllocaResult);
2275 }
2276 I->replaceAllUsesWith(V);
2277 }
2278 I->eraseFromParent();
2279 return true;
2280}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I, raw_ostream &OS)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallString class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:2009
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2723
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1975
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1268
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1410
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2716
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2279
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2318
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
LLVM_ABI Value * CreateBitPreservingCastChain(const DataLayout &DL, Value *V, Type *NewTy)
Create a chain of casts to convert V to NewTy, preserving the bit pattern of V.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2420
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1262
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2581
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2416
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:358
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2284
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1958
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1563
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2162
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1622
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2274
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2595
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1994
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1644
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2289
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
Definition IRBuilder.h:2022
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2900
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:287
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
virtual bool shouldIssueAtomicLoadForAtomicEmulationLoop(void) const
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
LLVM_ABI bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:435
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.