LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(DiagnosticInst ? DiagnosticInst : &FailedInst, Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 static Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177
178 // Preserves the DebugLoc from I, and preserves still valid metadata.
179 // Enable StrictFP builder mode when appropriate.
180 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
181 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
183 [this](Instruction *I) { addMMRAMD(I); })) {
184 SetInsertPoint(I);
185 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
186 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
190 }
191
192 void addMMRAMD(Instruction *I) {
194 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
195 }
196};
197
198} // end anonymous namespace
199
200char AtomicExpandLegacy::ID = 0;
201
202char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
203
205 "Expand Atomic instructions", false, false)
208INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
209 "Expand Atomic instructions", false, false)
210
211// Helper functions to retrieve the size of atomic instructions.
212static unsigned getAtomicOpSize(LoadInst *LI) {
213 const DataLayout &DL = LI->getDataLayout();
214 return DL.getTypeStoreSize(LI->getType());
215}
216
217static unsigned getAtomicOpSize(StoreInst *SI) {
218 const DataLayout &DL = SI->getDataLayout();
219 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
220}
221
222static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
223 const DataLayout &DL = RMWI->getDataLayout();
224 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
225}
226
227static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
228 const DataLayout &DL = CASI->getDataLayout();
229 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
230}
231
232/// Copy metadata that's safe to preserve when widening atomics.
234 const Instruction &Source) {
236 Source.getAllMetadata(MD);
237 LLVMContext &Ctx = Dest.getContext();
238 MDBuilder MDB(Ctx);
239
240 for (auto [ID, N] : MD) {
241 switch (ID) {
242 case LLVMContext::MD_dbg:
243 case LLVMContext::MD_tbaa:
244 case LLVMContext::MD_tbaa_struct:
245 case LLVMContext::MD_alias_scope:
246 case LLVMContext::MD_noalias:
247 case LLVMContext::MD_noalias_addrspace:
248 case LLVMContext::MD_access_group:
249 case LLVMContext::MD_mmra:
250 Dest.setMetadata(ID, N);
251 break;
252 default:
253 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
254 Dest.setMetadata(ID, N);
255 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
256 Dest.setMetadata(ID, N);
257
258 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
259 // uses.
260 break;
261 }
262 }
263}
264
265template <typename Inst>
266static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
267 unsigned Size = getAtomicOpSize(I);
268 Align Alignment = I->getAlign();
269 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
270 return Alignment >= Size && Size <= MaxSize;
271}
272
273template <typename Inst>
275 raw_ostream &OS) {
276 unsigned Size = getAtomicOpSize(I);
277 Align Alignment = I->getAlign();
278 bool NeedSeparator = false;
279
280 if (Alignment < Size) {
281 OS << "instruction alignment " << Alignment.value()
282 << " is smaller than the required " << Size
283 << "-byte alignment for this atomic operation";
284 NeedSeparator = true;
285 }
286
287 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
288 if (Size > MaxSize) {
289 if (NeedSeparator)
290 OS << "; ";
291 OS << "target supports atomics up to " << MaxSize
292 << " bytes, but this atomic accesses " << Size << " bytes";
293 }
294}
295
296template <typename Inst>
297void AtomicExpandImpl::handleUnsupportedAtomicSize(
298 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
299 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
300 SmallString<128> FailureReason;
301 raw_svector_ostream OS(FailureReason);
303 handleFailure(*I, Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
304 DiagnosticInst);
305}
306
307bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
309 return false;
310
311 IRBuilder Builder(AtomicI);
312 if (auto *TrailingFence = TLI->emitTrailingFence(
313 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
314 TrailingFence->moveAfter(AtomicI);
315 return true;
316 }
317 return false;
318}
319
320template <typename AtomicInst>
321bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
322 bool OrderingRequiresFence,
323 AtomicOrdering NewOrdering) {
324 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
325 if (OrderingRequiresFence && ShouldInsertFences) {
326 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
327 AtomicI->setOrdering(NewOrdering);
328 return bracketInstWithFences(AtomicI, FenceOrdering);
329 }
330 if (!ShouldInsertFences)
331 return tryInsertTrailingSeqCstFence(AtomicI);
332 return false;
333}
334
335bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
336 if (auto *LI = dyn_cast<LoadInst>(I)) {
337 if (!LI->isAtomic())
338 return false;
339
340 if (!atomicSizeSupported(TLI, LI)) {
341 expandAtomicLoadToLibcall(LI);
342 return true;
343 }
344
345 bool MadeChange = false;
346 if (TLI->shouldCastAtomicLoadInIR(LI) ==
347 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
348 LI = convertAtomicLoadToIntegerType(LI);
349 MadeChange = true;
350 }
351
352 MadeChange |= tryInsertFencesForAtomic(
353 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
354
355 MadeChange |= tryExpandAtomicLoad(LI);
356 return MadeChange;
357 }
358
359 if (auto *SI = dyn_cast<StoreInst>(I)) {
360 if (!SI->isAtomic())
361 return false;
362
363 if (!atomicSizeSupported(TLI, SI)) {
364 expandAtomicStoreToLibcall(SI);
365 return true;
366 }
367
368 bool MadeChange = false;
369 if (TLI->shouldCastAtomicStoreInIR(SI) ==
370 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
371 SI = convertAtomicStoreToIntegerType(SI);
372 MadeChange = true;
373 }
374
375 MadeChange |= tryInsertFencesForAtomic(
376 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
377
378 MadeChange |= tryExpandAtomicStore(SI);
379 return MadeChange;
380 }
381
382 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
383 if (!atomicSizeSupported(TLI, RMWI)) {
384 expandAtomicRMWToLibcall(RMWI);
385 return true;
386 }
387
388 bool MadeChange = false;
389 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
390 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
391 RMWI = convertAtomicXchgToIntegerType(RMWI);
392 MadeChange = true;
393 }
394
395 MadeChange |= tryInsertFencesForAtomic(
396 RMWI,
397 isReleaseOrStronger(RMWI->getOrdering()) ||
398 isAcquireOrStronger(RMWI->getOrdering()),
400
401 // There are two different ways of expanding RMW instructions:
402 // - into a load if it is idempotent
403 // - into a Cmpxchg/LL-SC loop otherwise
404 // we try them in that order.
405 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
406 tryExpandAtomicRMW(RMWI);
407 return MadeChange;
408 }
409
410 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
411 if (!atomicSizeSupported(TLI, CASI)) {
412 expandAtomicCASToLibcall(CASI);
413 return true;
414 }
415
416 // TODO: when we're ready to make the change at the IR level, we can
417 // extend convertCmpXchgToInteger for floating point too.
418 bool MadeChange = false;
419 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
420 // TODO: add a TLI hook to control this so that each target can
421 // convert to lowering the original type one at a time.
422 CASI = convertCmpXchgToIntegerType(CASI);
423 MadeChange = true;
424 }
425
426 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
427 if (TLI->shouldInsertFencesForAtomic(CASI)) {
428 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
429 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
430 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
431 isAcquireOrStronger(CASI->getFailureOrdering()))) {
432 // If a compare and swap is lowered to LL/SC, we can do smarter fence
433 // insertion, with a stronger one on the success path than on the
434 // failure path. As a result, fence insertion is directly done by
435 // expandAtomicCmpXchg in that case.
436 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
437 AtomicOrdering CASOrdering =
439 CASI->setSuccessOrdering(CASOrdering);
440 CASI->setFailureOrdering(CASOrdering);
441 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
442 }
443 } else if (CmpXchgExpansion !=
444 TargetLoweringBase::AtomicExpansionKind::LLSC) {
445 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
446 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
447 }
448
449 MadeChange |= tryExpandAtomicCmpXchg(CASI);
450 return MadeChange;
451 }
452
453 return false;
454}
455
456bool AtomicExpandImpl::run(
457 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
458 const TargetMachine *TM) {
459 const auto *Subtarget = TM->getSubtargetImpl(F);
460 if (!Subtarget->enableAtomicExpand())
461 return false;
462 TLI = Subtarget->getTargetLowering();
463 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
464 DL = &F.getDataLayout();
465
466 bool MadeChange = false;
467
468 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
469 BasicBlock *BB = &*BBI;
470
472
473 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
474 I = Next) {
475 Instruction &Inst = *I;
476 Next = std::next(I);
477
478 if (processAtomicInstr(&Inst)) {
479 MadeChange = true;
480
481 // New blocks may have been inserted.
482 BBE = F.end();
483 }
484 }
485 }
486
487 return MadeChange;
488}
489
490bool AtomicExpandLegacy::runOnFunction(Function &F) {
491
492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493 if (!TPC)
494 return false;
495 auto *TM = &TPC->getTM<TargetMachine>();
496
497 const LibcallLoweringModuleAnalysisResult &LibcallResult =
498 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
499 AtomicExpandImpl AE;
500 return AE.run(F, LibcallResult, TM);
501}
502
504 return new AtomicExpandLegacy();
505}
506
509 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
510
511 const LibcallLoweringModuleAnalysisResult *LibcallResult =
512 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
513
514 if (!LibcallResult) {
515 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
516 "' analysis required");
517 return PreservedAnalyses::all();
518 }
519
520 AtomicExpandImpl AE;
521
522 bool Changed = AE.run(F, *LibcallResult, TM);
523 if (!Changed)
524 return PreservedAnalyses::all();
525
527}
528
529bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
530 AtomicOrdering Order) {
531 ReplacementIRBuilder Builder(I, *DL);
532
533 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
534
535 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
536 // We have a guard here because not every atomic operation generates a
537 // trailing fence.
538 if (TrailingFence)
539 TrailingFence->moveAfter(I);
540
541 return (LeadingFence || TrailingFence);
542}
543
544/// Get the iX type with the same bitwidth as T.
546AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
547 EVT VT = TLI->getMemValueType(DL, T);
548 unsigned BitWidth = VT.getStoreSizeInBits();
549 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
550 return IntegerType::get(T->getContext(), BitWidth);
551}
552
553/// Convert an atomic load of a non-integral type to an integer load of the
554/// equivalent bitwidth. See the function comment on
555/// convertAtomicStoreToIntegerType for background.
556LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
557 auto *M = LI->getModule();
558 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
559
560 ReplacementIRBuilder Builder(LI, *DL);
561
562 Value *Addr = LI->getPointerOperand();
563
564 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
565 NewLI->setAlignment(LI->getAlign());
566 NewLI->setVolatile(LI->isVolatile());
567 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
568 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
569
570 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
571 LI->replaceAllUsesWith(NewVal);
572 LI->eraseFromParent();
573 return NewLI;
574}
575
576AtomicRMWInst *
577AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
579
580 auto *M = RMWI->getModule();
581 Type *NewTy =
582 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
583
584 ReplacementIRBuilder Builder(RMWI, *DL);
585
586 Value *Addr = RMWI->getPointerOperand();
587 Value *Val = RMWI->getValOperand();
588 Value *NewVal = Val->getType()->isPointerTy()
589 ? Builder.CreatePtrToInt(Val, NewTy)
590 : Builder.CreateBitCast(Val, NewTy);
591
592 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
593 RMWI->getAlign(), RMWI->getOrdering(),
594 RMWI->getSyncScopeID());
595 NewRMWI->setVolatile(RMWI->isVolatile());
596 copyMetadataForAtomic(*NewRMWI, *RMWI);
597 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
598
599 Value *NewRVal = RMWI->getType()->isPointerTy()
600 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
601 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
602 RMWI->replaceAllUsesWith(NewRVal);
603 RMWI->eraseFromParent();
604 return NewRMWI;
605}
606
607bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
608 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
609 case TargetLoweringBase::AtomicExpansionKind::None:
610 return false;
611 case TargetLoweringBase::AtomicExpansionKind::LLSC:
612 expandAtomicOpToLLSC(
613 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
614 LI->getOrdering(),
615 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
616 return true;
617 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
618 return expandAtomicLoadToLL(LI);
619 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
620 return expandAtomicLoadToCmpXchg(LI);
621 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
622 LI->setAtomic(AtomicOrdering::NotAtomic);
623 return true;
624 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
625 TLI->emitExpandAtomicLoad(LI);
626 return true;
627 default:
628 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
629 }
630}
631
632bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
633 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
634 case TargetLoweringBase::AtomicExpansionKind::None:
635 return false;
636 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
637 TLI->emitExpandAtomicStore(SI);
638 return true;
639 case TargetLoweringBase::AtomicExpansionKind::Expand:
640 expandAtomicStoreToXChg(SI);
641 return true;
642 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
643 SI->setAtomic(AtomicOrdering::NotAtomic);
644 return true;
645 default:
646 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
647 }
648}
649
650bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
651 ReplacementIRBuilder Builder(LI, *DL);
652
653 // On some architectures, load-linked instructions are atomic for larger
654 // sizes than normal loads. For example, the only 64-bit load guaranteed
655 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
656 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
657 LI->getPointerOperand(), LI->getOrdering());
659
660 LI->replaceAllUsesWith(Val);
661 LI->eraseFromParent();
662
663 return true;
664}
665
666bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
667 ReplacementIRBuilder Builder(LI, *DL);
668 AtomicOrdering Order = LI->getOrdering();
669 if (Order == AtomicOrdering::Unordered)
670 Order = AtomicOrdering::Monotonic;
671
672 Value *Addr = LI->getPointerOperand();
673 Type *Ty = LI->getType();
674 Constant *DummyVal = Constant::getNullValue(Ty);
675
676 Value *Pair = Builder.CreateAtomicCmpXchg(
677 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
679 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
680
681 LI->replaceAllUsesWith(Loaded);
682 LI->eraseFromParent();
683
684 return true;
685}
686
687/// Convert an atomic store of a non-integral type to an integer store of the
688/// equivalent bitwidth. We used to not support floating point or vector
689/// atomics in the IR at all. The backends learned to deal with the bitcast
690/// idiom because that was the only way of expressing the notion of a atomic
691/// float or vector store. The long term plan is to teach each backend to
692/// instruction select from the original atomic store, but as a migration
693/// mechanism, we convert back to the old format which the backends understand.
694/// Each backend will need individual work to recognize the new format.
695StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
696 ReplacementIRBuilder Builder(SI, *DL);
697 auto *M = SI->getModule();
698 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
699 M->getDataLayout());
700 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
701
702 Value *Addr = SI->getPointerOperand();
703
704 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
705 NewSI->setAlignment(SI->getAlign());
706 NewSI->setVolatile(SI->isVolatile());
707 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
708 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
709 SI->eraseFromParent();
710 return NewSI;
711}
712
713void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
714 // This function is only called on atomic stores that are too large to be
715 // atomic if implemented as a native store. So we replace them by an
716 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
717 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
718 // It is the responsibility of the target to only signal expansion via
719 // shouldExpandAtomicRMW in cases where this is required and possible.
720 ReplacementIRBuilder Builder(SI, *DL);
721 AtomicOrdering Ordering = SI->getOrdering();
722 assert(Ordering != AtomicOrdering::NotAtomic);
723 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
724 ? AtomicOrdering::Monotonic
725 : Ordering;
726 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
727 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
728 SI->getAlign(), RMWOrdering);
729 SI->eraseFromParent();
730
731 // Now we have an appropriate swap instruction, lower it as usual.
732 tryExpandAtomicRMW(AI);
733}
734
735static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
736 Value *Loaded, Value *NewVal, Align AddrAlign,
737 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
738 Value *&Success, Value *&NewLoaded,
739 Instruction *MetadataSrc) {
740 Type *OrigTy = NewVal->getType();
741
742 // This code can go away when cmpxchg supports FP and vector types.
743 assert(!OrigTy->isPointerTy());
744 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
745 if (NeedBitcast) {
746 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
747 NewVal = Builder.CreateBitCast(NewVal, IntTy);
748 Loaded = Builder.CreateBitCast(Loaded, IntTy);
749 }
750
751 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
752 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
754 if (MetadataSrc)
755 copyMetadataForAtomic(*Pair, *MetadataSrc);
756
757 Success = Builder.CreateExtractValue(Pair, 1, "success");
758 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
759
760 if (NeedBitcast)
761 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
762}
763
764bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
765 LLVMContext &Ctx = AI->getModule()->getContext();
766 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
767 switch (Kind) {
768 case TargetLoweringBase::AtomicExpansionKind::None:
769 return false;
770 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
771 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
772 unsigned ValueSize = getAtomicOpSize(AI);
773 if (ValueSize < MinCASSize) {
774 expandPartwordAtomicRMW(AI,
775 TargetLoweringBase::AtomicExpansionKind::LLSC);
776 } else {
777 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
778 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
779 AI->getValOperand());
780 };
781 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
782 AI->getAlign(), AI->getOrdering(), PerformOp);
783 }
784 return true;
785 }
786 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
787 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
788 unsigned ValueSize = getAtomicOpSize(AI);
789 if (ValueSize < MinCASSize) {
790 expandPartwordAtomicRMW(AI,
791 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
792 } else {
794 Ctx.getSyncScopeNames(SSNs);
795 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
796 ? "system"
797 : SSNs[AI->getSyncScopeID()];
798 OptimizationRemarkEmitter ORE(AI->getFunction());
799 ORE.emit([&]() {
800 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
801 << "A compare and swap loop was generated for an atomic "
802 << AI->getOperationName(AI->getOperation()) << " operation at "
803 << MemScope << " memory scope";
804 });
805 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
806 }
807 return true;
808 }
809 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
810 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
811 unsigned ValueSize = getAtomicOpSize(AI);
812 if (ValueSize < MinCASSize) {
814 // Widen And/Or/Xor and give the target another chance at expanding it.
817 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
818 return true;
819 }
820 }
821 expandAtomicRMWToMaskedIntrinsic(AI);
822 return true;
823 }
824 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
826 return true;
827 }
828 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
830 return true;
831 }
832 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
833 return lowerAtomicRMWInst(AI);
834 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
835 TLI->emitExpandAtomicRMW(AI);
836 return true;
837 default:
838 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
839 }
840}
841
842namespace {
843
844struct PartwordMaskValues {
845 // These three fields are guaranteed to be set by createMaskInstrs.
846 Type *WordType = nullptr;
847 Type *ValueType = nullptr;
848 Type *IntValueType = nullptr;
849 Value *AlignedAddr = nullptr;
850 Align AlignedAddrAlignment;
851 // The remaining fields can be null.
852 Value *ShiftAmt = nullptr;
853 Value *Mask = nullptr;
854 Value *Inv_Mask = nullptr;
855};
856
857[[maybe_unused]]
858raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
859 auto PrintObj = [&O](auto *V) {
860 if (V)
861 O << *V;
862 else
863 O << "nullptr";
864 O << '\n';
865 };
866 O << "PartwordMaskValues {\n";
867 O << " WordType: ";
868 PrintObj(PMV.WordType);
869 O << " ValueType: ";
870 PrintObj(PMV.ValueType);
871 O << " AlignedAddr: ";
872 PrintObj(PMV.AlignedAddr);
873 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
874 O << " ShiftAmt: ";
875 PrintObj(PMV.ShiftAmt);
876 O << " Mask: ";
877 PrintObj(PMV.Mask);
878 O << " Inv_Mask: ";
879 PrintObj(PMV.Inv_Mask);
880 O << "}\n";
881 return O;
882}
883
884} // end anonymous namespace
885
886/// This is a helper function which builds instructions to provide
887/// values necessary for partword atomic operations. It takes an
888/// incoming address, Addr, and ValueType, and constructs the address,
889/// shift-amounts and masks needed to work with a larger value of size
890/// WordSize.
891///
892/// AlignedAddr: Addr rounded down to a multiple of WordSize
893///
894/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
895/// from AlignAddr for it to have the same value as if
896/// ValueType was loaded from Addr.
897///
898/// Mask: Value to mask with the value loaded from AlignAddr to
899/// include only the part that would've been loaded from Addr.
900///
901/// Inv_Mask: The inverse of Mask.
902static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
904 Value *Addr, Align AddrAlign,
905 unsigned MinWordSize) {
906 PartwordMaskValues PMV;
907
908 Module *M = I->getModule();
909 LLVMContext &Ctx = M->getContext();
910 const DataLayout &DL = M->getDataLayout();
911 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
912
913 PMV.ValueType = PMV.IntValueType = ValueType;
914 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
915 PMV.IntValueType =
916 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
917
918 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
919 : ValueType;
920 if (PMV.ValueType == PMV.WordType) {
921 PMV.AlignedAddr = Addr;
922 PMV.AlignedAddrAlignment = AddrAlign;
923 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
924 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
925 return PMV;
926 }
927
928 PMV.AlignedAddrAlignment = Align(MinWordSize);
929
930 assert(ValueSize < MinWordSize);
931
932 PointerType *PtrTy = cast<PointerType>(Addr->getType());
933 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
934 Value *PtrLSB;
935
936 if (AddrAlign < MinWordSize) {
937 PMV.AlignedAddr = Builder.CreateIntrinsic(
938 Intrinsic::ptrmask, {PtrTy, IntTy},
939 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
940 nullptr, "AlignedAddr");
941
942 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
943 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
944 } else {
945 // If the alignment is high enough, the LSB are known 0.
946 PMV.AlignedAddr = Addr;
947 PtrLSB = ConstantInt::getNullValue(IntTy);
948 }
949
950 if (DL.isLittleEndian()) {
951 // turn bytes into bits
952 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
953 } else {
954 // turn bytes into bits, and count from the other side.
955 PMV.ShiftAmt = Builder.CreateShl(
956 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
957 }
958
959 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
960 PMV.Mask = Builder.CreateShl(
961 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
962 "Mask");
963
964 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
965
966 return PMV;
967}
968
969static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
970 const PartwordMaskValues &PMV) {
971 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
972 if (PMV.WordType == PMV.ValueType)
973 return WideWord;
974
975 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
976 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
977 return Builder.CreateBitCast(Trunc, PMV.ValueType);
978}
979
980static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
981 Value *Updated, const PartwordMaskValues &PMV) {
982 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
983 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
984 if (PMV.WordType == PMV.ValueType)
985 return Updated;
986
987 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
988
989 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
990 Value *Shift =
991 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
992 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
993 Value *Or = Builder.CreateOr(And, Shift, "inserted");
994 return Or;
995}
996
997/// Emit IR to implement a masked version of a given atomicrmw
998/// operation. (That is, only the bits under the Mask should be
999/// affected by the operation)
1001 IRBuilderBase &Builder, Value *Loaded,
1002 Value *Shifted_Inc, Value *Inc,
1003 const PartwordMaskValues &PMV) {
1004 // TODO: update to use
1005 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1006 // to merge bits from two values without requiring PMV.Inv_Mask.
1007 switch (Op) {
1008 case AtomicRMWInst::Xchg: {
1009 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1010 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
1011 return FinalVal;
1012 }
1013 case AtomicRMWInst::Or:
1014 case AtomicRMWInst::Xor:
1015 case AtomicRMWInst::And:
1016 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1017 case AtomicRMWInst::Add:
1018 case AtomicRMWInst::Sub:
1019 case AtomicRMWInst::Nand: {
1020 // The other arithmetic ops need to be masked into place.
1021 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
1022 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
1023 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1024 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
1025 return FinalVal;
1026 }
1027 case AtomicRMWInst::Max:
1028 case AtomicRMWInst::Min:
1043 // Finally, other ops will operate on the full value, so truncate down to
1044 // the original size, and expand out again after doing the
1045 // operation. Bitcasts will be inserted for FP values.
1046 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1047 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1048 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1049 return FinalVal;
1050 }
1051 default:
1052 llvm_unreachable("Unknown atomic op");
1053 }
1054}
1055
1056/// Expand a sub-word atomicrmw operation into an appropriate
1057/// word-sized operation.
1058///
1059/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1060/// way as a typical atomicrmw expansion. The only difference here is
1061/// that the operation inside of the loop may operate upon only a
1062/// part of the value.
1063void AtomicExpandImpl::expandPartwordAtomicRMW(
1064 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1065 // Widen And/Or/Xor and give the target another chance at expanding it.
1069 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1070 return;
1071 }
1072 AtomicOrdering MemOpOrder = AI->getOrdering();
1073 SyncScope::ID SSID = AI->getSyncScopeID();
1074
1075 ReplacementIRBuilder Builder(AI, *DL);
1076
1077 PartwordMaskValues PMV =
1078 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1079 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1080
1081 Value *ValOperand_Shifted = nullptr;
1084 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1085 ValOperand_Shifted =
1086 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1087 "ValOperand_Shifted");
1088 }
1089
1090 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1091 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1092 AI->getValOperand(), PMV);
1093 };
1094
1095 Value *OldResult;
1096 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1097 OldResult = insertRMWCmpXchgLoop(
1098 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1099 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1100 } else {
1101 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1102 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1103 PMV.AlignedAddrAlignment, MemOpOrder,
1104 PerformPartwordOp);
1105 }
1106
1107 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1108 AI->replaceAllUsesWith(FinalOldResult);
1109 AI->eraseFromParent();
1110}
1111
1112// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1113AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1114 ReplacementIRBuilder Builder(AI, *DL);
1116
1118 Op == AtomicRMWInst::And) &&
1119 "Unable to widen operation");
1120
1121 PartwordMaskValues PMV =
1122 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1123 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1124
1125 Value *ValOperand_Shifted =
1126 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1127 PMV.ShiftAmt, "ValOperand_Shifted");
1128
1129 Value *NewOperand;
1130
1131 if (Op == AtomicRMWInst::And)
1132 NewOperand =
1133 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1134 else
1135 NewOperand = ValOperand_Shifted;
1136
1137 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1138 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1139 AI->getOrdering(), AI->getSyncScopeID());
1140
1141 copyMetadataForAtomic(*NewAI, *AI);
1142
1143 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1144 AI->replaceAllUsesWith(FinalOldResult);
1145 AI->eraseFromParent();
1146 return NewAI;
1147}
1148
1149bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1150 // The basic idea here is that we're expanding a cmpxchg of a
1151 // smaller memory size up to a word-sized cmpxchg. To do this, we
1152 // need to add a retry-loop for strong cmpxchg, so that
1153 // modifications to other parts of the word don't cause a spurious
1154 // failure.
1155
1156 // This generates code like the following:
1157 // [[Setup mask values PMV.*]]
1158 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1159 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1160 // %InitLoaded = load i32* %addr
1161 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1162 // br partword.cmpxchg.loop
1163 // partword.cmpxchg.loop:
1164 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1165 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1166 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1167 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1168 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1169 // i32 %FullWord_NewVal success_ordering failure_ordering
1170 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1171 // %Success = extractvalue { i32, i1 } %NewCI, 1
1172 // br i1 %Success, label %partword.cmpxchg.end,
1173 // label %partword.cmpxchg.failure
1174 // partword.cmpxchg.failure:
1175 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1176 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1177 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1178 // label %partword.cmpxchg.end
1179 // partword.cmpxchg.end:
1180 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1181 // %FinalOldVal = trunc i32 %tmp1 to i8
1182 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1183 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1184
1185 Value *Addr = CI->getPointerOperand();
1186 Value *Cmp = CI->getCompareOperand();
1187 Value *NewVal = CI->getNewValOperand();
1188
1189 BasicBlock *BB = CI->getParent();
1190 Function *F = BB->getParent();
1191 ReplacementIRBuilder Builder(CI, *DL);
1192 LLVMContext &Ctx = Builder.getContext();
1193
1194 BasicBlock *EndBB =
1195 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1196 auto FailureBB =
1197 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1198 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1199
1200 // The split call above "helpfully" added a branch at the end of BB
1201 // (to the wrong place).
1202 std::prev(BB->end())->eraseFromParent();
1203 Builder.SetInsertPoint(BB);
1204
1205 PartwordMaskValues PMV =
1206 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1207 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1208
1209 // Shift the incoming values over, into the right location in the word.
1210 Value *NewVal_Shifted =
1211 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1212 Value *Cmp_Shifted =
1213 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1214
1215 // Load the entire current word, and mask into place the expected and new
1216 // values
1217 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1218 InitLoaded->setVolatile(CI->isVolatile());
1219 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1220 Builder.CreateBr(LoopBB);
1221
1222 // partword.cmpxchg.loop:
1223 Builder.SetInsertPoint(LoopBB);
1224 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1225 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1226
1227 // Mask/Or the expected and new values into place in the loaded word.
1228 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1229 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1230 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1231 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1233 NewCI->setVolatile(CI->isVolatile());
1234 // When we're building a strong cmpxchg, we need a loop, so you
1235 // might think we could use a weak cmpxchg inside. But, using strong
1236 // allows the below comparison for ShouldContinue, and we're
1237 // expecting the underlying cmpxchg to be a machine instruction,
1238 // which is strong anyways.
1239 NewCI->setWeak(CI->isWeak());
1240
1241 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1242 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1243
1244 if (CI->isWeak())
1245 Builder.CreateBr(EndBB);
1246 else
1247 Builder.CreateCondBr(Success, EndBB, FailureBB);
1248
1249 // partword.cmpxchg.failure:
1250 Builder.SetInsertPoint(FailureBB);
1251 // Upon failure, verify that the masked-out part of the loaded value
1252 // has been modified. If it didn't, abort the cmpxchg, since the
1253 // masked-in part must've.
1254 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1255 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1256 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1257
1258 // Add the second value to the phi from above
1259 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1260
1261 // partword.cmpxchg.end:
1262 Builder.SetInsertPoint(CI);
1263
1264 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1265 Value *Res = PoisonValue::get(CI->getType());
1266 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1267 Res = Builder.CreateInsertValue(Res, Success, 1);
1268
1269 CI->replaceAllUsesWith(Res);
1270 CI->eraseFromParent();
1271 return true;
1272}
1273
1274void AtomicExpandImpl::expandAtomicOpToLLSC(
1275 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1276 AtomicOrdering MemOpOrder,
1277 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1278 ReplacementIRBuilder Builder(I, *DL);
1279 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1280 MemOpOrder, PerformOp);
1281
1282 I->replaceAllUsesWith(Loaded);
1283 I->eraseFromParent();
1284}
1285
1286void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1287 ReplacementIRBuilder Builder(AI, *DL);
1288
1289 PartwordMaskValues PMV =
1290 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1291 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1292
1293 // The value operand must be sign-extended for signed min/max so that the
1294 // target's signed comparison instructions can be used. Otherwise, just
1295 // zero-ext.
1296 Instruction::CastOps CastOp = Instruction::ZExt;
1297 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1298 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1299 CastOp = Instruction::SExt;
1300
1301 Value *ValOperand_Shifted = Builder.CreateShl(
1302 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1303 PMV.ShiftAmt, "ValOperand_Shifted");
1304 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1305 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1306 AI->getOrdering());
1307 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1308 AI->replaceAllUsesWith(FinalOldResult);
1309 AI->eraseFromParent();
1310}
1311
1312void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1313 AtomicCmpXchgInst *CI) {
1314 ReplacementIRBuilder Builder(CI, *DL);
1315
1316 PartwordMaskValues PMV = createMaskInstrs(
1317 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1318 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1319
1320 Value *CmpVal_Shifted = Builder.CreateShl(
1321 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1322 "CmpVal_Shifted");
1323 Value *NewVal_Shifted = Builder.CreateShl(
1324 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1325 "NewVal_Shifted");
1327 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1328 CI->getMergedOrdering());
1329 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1330 Value *Res = PoisonValue::get(CI->getType());
1331 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1332 Value *Success = Builder.CreateICmpEQ(
1333 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1334 Res = Builder.CreateInsertValue(Res, Success, 1);
1335
1336 CI->replaceAllUsesWith(Res);
1337 CI->eraseFromParent();
1338}
1339
1340Value *AtomicExpandImpl::insertRMWLLSCLoop(
1341 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1342 AtomicOrdering MemOpOrder,
1343 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1344 LLVMContext &Ctx = Builder.getContext();
1345 BasicBlock *BB = Builder.GetInsertBlock();
1346 Function *F = BB->getParent();
1347
1348 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1349 "Expected at least natural alignment at this point.");
1350
1351 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1352 //
1353 // The standard expansion we produce is:
1354 // [...]
1355 // atomicrmw.start:
1356 // %loaded = @load.linked(%addr)
1357 // %new = some_op iN %loaded, %incr
1358 // %stored = @store_conditional(%new, %addr)
1359 // %try_again = icmp i32 ne %stored, 0
1360 // br i1 %try_again, label %loop, label %atomicrmw.end
1361 // atomicrmw.end:
1362 // [...]
1363 BasicBlock *ExitBB =
1364 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1365 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1366
1367 // The split call above "helpfully" added a branch at the end of BB (to the
1368 // wrong place).
1369 std::prev(BB->end())->eraseFromParent();
1370 Builder.SetInsertPoint(BB);
1371 Builder.CreateBr(LoopBB);
1372
1373 // Start the main loop block now that we've taken care of the preliminaries.
1374 Builder.SetInsertPoint(LoopBB);
1375 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1376
1377 Value *NewVal = PerformOp(Builder, Loaded);
1378
1379 Value *StoreSuccess =
1380 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1381 Value *TryAgain = Builder.CreateICmpNE(
1382 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1383
1384 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1385
1386 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1387 // hard to predict precise branch weigths we mark the branch as "unknown"
1388 // (50/50) to prevent misleading optimizations.
1390
1391 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1392 return Loaded;
1393}
1394
1395/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1396/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1397/// IR. As a migration step, we convert back to what use to be the standard
1398/// way to represent a pointer cmpxchg so that we can update backends one by
1399/// one.
1400AtomicCmpXchgInst *
1401AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1402 auto *M = CI->getModule();
1403 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1404 M->getDataLayout());
1405
1406 ReplacementIRBuilder Builder(CI, *DL);
1407
1408 Value *Addr = CI->getPointerOperand();
1409
1410 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1411 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1412
1413 auto *NewCI = Builder.CreateAtomicCmpXchg(
1414 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1415 CI->getFailureOrdering(), CI->getSyncScopeID());
1416 NewCI->setVolatile(CI->isVolatile());
1417 NewCI->setWeak(CI->isWeak());
1418 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1419
1420 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1421 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1422
1423 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1424
1425 Value *Res = PoisonValue::get(CI->getType());
1426 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1427 Res = Builder.CreateInsertValue(Res, Succ, 1);
1428
1429 CI->replaceAllUsesWith(Res);
1430 CI->eraseFromParent();
1431 return NewCI;
1432}
1433
1434bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1435 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1436 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1437 Value *Addr = CI->getPointerOperand();
1438 BasicBlock *BB = CI->getParent();
1439 Function *F = BB->getParent();
1440 LLVMContext &Ctx = F->getContext();
1441 // If shouldInsertFencesForAtomic() returns true, then the target does not
1442 // want to deal with memory orders, and emitLeading/TrailingFence should take
1443 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1444 // should preserve the ordering.
1445 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1446 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1447 ? AtomicOrdering::Monotonic
1448 : CI->getMergedOrdering();
1449
1450 // In implementations which use a barrier to achieve release semantics, we can
1451 // delay emitting this barrier until we know a store is actually going to be
1452 // attempted. The cost of this delay is that we need 2 copies of the block
1453 // emitting the load-linked, affecting code size.
1454 //
1455 // Ideally, this logic would be unconditional except for the minsize check
1456 // since in other cases the extra blocks naturally collapse down to the
1457 // minimal loop. Unfortunately, this puts too much stress on later
1458 // optimisations so we avoid emitting the extra logic in those cases too.
1459 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1460 SuccessOrder != AtomicOrdering::Monotonic &&
1461 SuccessOrder != AtomicOrdering::Acquire &&
1462 !F->hasMinSize();
1463
1464 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1465 // do it even on minsize.
1466 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1467
1468 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1469 //
1470 // The full expansion we produce is:
1471 // [...]
1472 // %aligned.addr = ...
1473 // cmpxchg.start:
1474 // %unreleasedload = @load.linked(%aligned.addr)
1475 // %unreleasedload.extract = extract value from %unreleasedload
1476 // %should_store = icmp eq %unreleasedload.extract, %desired
1477 // br i1 %should_store, label %cmpxchg.releasingstore,
1478 // label %cmpxchg.nostore
1479 // cmpxchg.releasingstore:
1480 // fence?
1481 // br label cmpxchg.trystore
1482 // cmpxchg.trystore:
1483 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1484 // [%releasedload, %cmpxchg.releasedload]
1485 // %updated.new = insert %new into %loaded.trystore
1486 // %stored = @store_conditional(%updated.new, %aligned.addr)
1487 // %success = icmp eq i32 %stored, 0
1488 // br i1 %success, label %cmpxchg.success,
1489 // label %cmpxchg.releasedload/%cmpxchg.failure
1490 // cmpxchg.releasedload:
1491 // %releasedload = @load.linked(%aligned.addr)
1492 // %releasedload.extract = extract value from %releasedload
1493 // %should_store = icmp eq %releasedload.extract, %desired
1494 // br i1 %should_store, label %cmpxchg.trystore,
1495 // label %cmpxchg.failure
1496 // cmpxchg.success:
1497 // fence?
1498 // br label %cmpxchg.end
1499 // cmpxchg.nostore:
1500 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1501 // [%releasedload,
1502 // %cmpxchg.releasedload/%cmpxchg.trystore]
1503 // @load_linked_fail_balance()?
1504 // br label %cmpxchg.failure
1505 // cmpxchg.failure:
1506 // fence?
1507 // br label %cmpxchg.end
1508 // cmpxchg.end:
1509 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1510 // [%loaded.trystore, %cmpxchg.trystore]
1511 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1512 // %loaded = extract value from %loaded.exit
1513 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1514 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1515 // [...]
1516 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1517 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1518 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1519 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1520 auto ReleasedLoadBB =
1521 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1522 auto TryStoreBB =
1523 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1524 auto ReleasingStoreBB =
1525 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1526 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1527
1528 ReplacementIRBuilder Builder(CI, *DL);
1529
1530 // The split call above "helpfully" added a branch at the end of BB (to the
1531 // wrong place), but we might want a fence too. It's easiest to just remove
1532 // the branch entirely.
1533 std::prev(BB->end())->eraseFromParent();
1534 Builder.SetInsertPoint(BB);
1535 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1536 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1537
1538 PartwordMaskValues PMV =
1539 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1540 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1541 Builder.CreateBr(StartBB);
1542
1543 // Start the main loop block now that we've taken care of the preliminaries.
1544 Builder.SetInsertPoint(StartBB);
1545 Value *UnreleasedLoad =
1546 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1547 Value *UnreleasedLoadExtract =
1548 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1549 Value *ShouldStore = Builder.CreateICmpEQ(
1550 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1551
1552 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1553 // jump straight past that fence instruction (if it exists).
1554 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1555 MDBuilder(F->getContext()).createLikelyBranchWeights());
1556
1557 Builder.SetInsertPoint(ReleasingStoreBB);
1558 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1559 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1560 Builder.CreateBr(TryStoreBB);
1561
1562 Builder.SetInsertPoint(TryStoreBB);
1563 PHINode *LoadedTryStore =
1564 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1565 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1566 Value *NewValueInsert =
1567 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1568 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1569 PMV.AlignedAddr, MemOpOrder);
1570 StoreSuccess = Builder.CreateICmpEQ(
1571 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1572 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1573 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1574 CI->isWeak() ? FailureBB : RetryBB,
1575 MDBuilder(F->getContext()).createLikelyBranchWeights());
1576
1577 Builder.SetInsertPoint(ReleasedLoadBB);
1578 Value *SecondLoad;
1579 if (HasReleasedLoadBB) {
1580 SecondLoad =
1581 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1582 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1583 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1584 CI->getCompareOperand(), "should_store");
1585
1586 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1587 // jump straight past that fence instruction (if it exists).
1588 Builder.CreateCondBr(
1589 ShouldStore, TryStoreBB, NoStoreBB,
1590 MDBuilder(F->getContext()).createLikelyBranchWeights());
1591 // Update PHI node in TryStoreBB.
1592 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1593 } else
1594 Builder.CreateUnreachable();
1595
1596 // Make sure later instructions don't get reordered with a fence if
1597 // necessary.
1598 Builder.SetInsertPoint(SuccessBB);
1599 if (ShouldInsertFencesForAtomic ||
1601 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1602 Builder.CreateBr(ExitBB);
1603
1604 Builder.SetInsertPoint(NoStoreBB);
1605 PHINode *LoadedNoStore =
1606 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1607 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1608 if (HasReleasedLoadBB)
1609 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1610
1611 // In the failing case, where we don't execute the store-conditional, the
1612 // target might want to balance out the load-linked with a dedicated
1613 // instruction (e.g., on ARM, clearing the exclusive monitor).
1615 Builder.CreateBr(FailureBB);
1616
1617 Builder.SetInsertPoint(FailureBB);
1618 PHINode *LoadedFailure =
1619 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1620 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1621 if (CI->isWeak())
1622 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1623 if (ShouldInsertFencesForAtomic)
1624 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1625 Builder.CreateBr(ExitBB);
1626
1627 // Finally, we have control-flow based knowledge of whether the cmpxchg
1628 // succeeded or not. We expose this to later passes by converting any
1629 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1630 // PHI.
1631 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1632 PHINode *LoadedExit =
1633 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1634 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1635 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1636 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1637 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1638 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1639
1640 // This is the "exit value" from the cmpxchg expansion. It may be of
1641 // a type wider than the one in the cmpxchg instruction.
1642 Value *LoadedFull = LoadedExit;
1643
1644 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1645 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1646
1647 // Look for any users of the cmpxchg that are just comparing the loaded value
1648 // against the desired one, and replace them with the CFG-derived version.
1650 for (auto *User : CI->users()) {
1651 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1652 if (!EV)
1653 continue;
1654
1655 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1656 "weird extraction from { iN, i1 }");
1657
1658 if (EV->getIndices()[0] == 0)
1659 EV->replaceAllUsesWith(Loaded);
1660 else
1662
1663 PrunedInsts.push_back(EV);
1664 }
1665
1666 // We can remove the instructions now we're no longer iterating through them.
1667 for (auto *EV : PrunedInsts)
1668 EV->eraseFromParent();
1669
1670 if (!CI->use_empty()) {
1671 // Some use of the full struct return that we don't understand has happened,
1672 // so we've got to reconstruct it properly.
1673 Value *Res;
1674 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1675 Res = Builder.CreateInsertValue(Res, Success, 1);
1676
1677 CI->replaceAllUsesWith(Res);
1678 }
1679
1680 CI->eraseFromParent();
1681 return true;
1682}
1683
1684bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1685 // TODO: Add floating point support.
1686 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1687 if (!C)
1688 return false;
1689
1690 switch (RMWI->getOperation()) {
1691 case AtomicRMWInst::Add:
1692 case AtomicRMWInst::Sub:
1693 case AtomicRMWInst::Or:
1694 case AtomicRMWInst::Xor:
1695 return C->isZero();
1696 case AtomicRMWInst::And:
1697 return C->isMinusOne();
1698 case AtomicRMWInst::Min:
1699 return C->isMaxValue(true);
1700 case AtomicRMWInst::Max:
1701 return C->isMinValue(true);
1703 return C->isMaxValue(false);
1705 return C->isMinValue(false);
1706 default:
1707 return false;
1708 }
1709}
1710
1711bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1712 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1713 tryExpandAtomicLoad(ResultingLoad);
1714 return true;
1715 }
1716 return false;
1717}
1718
1719Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1720 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1721 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1722 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1723 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1724 LLVMContext &Ctx = Builder.getContext();
1725 BasicBlock *BB = Builder.GetInsertBlock();
1726 Function *F = BB->getParent();
1727
1728 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1729 //
1730 // The standard expansion we produce is:
1731 // [...]
1732 // %init_loaded = load atomic iN* %addr
1733 // br label %loop
1734 // loop:
1735 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1736 // %new = some_op iN %loaded, %incr
1737 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1738 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1739 // %success = extractvalue { iN, i1 } %pair, 1
1740 // br i1 %success, label %atomicrmw.end, label %loop
1741 // atomicrmw.end:
1742 // [...]
1743 BasicBlock *ExitBB =
1744 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1745 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1746
1747 // The split call above "helpfully" added a branch at the end of BB (to the
1748 // wrong place), but we want a load. It's easiest to just remove
1749 // the branch entirely.
1750 std::prev(BB->end())->eraseFromParent();
1751 Builder.SetInsertPoint(BB);
1752 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1753 // TODO: The initial load must be atomic with the same synchronization scope
1754 // to avoid a data race with concurrent stores. If the instruction being
1755 // emulated is volatile, issue a volatile load.
1756 Builder.CreateBr(LoopBB);
1757
1758 // Start the main loop block now that we've taken care of the preliminaries.
1759 Builder.SetInsertPoint(LoopBB);
1760 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1761 Loaded->addIncoming(InitLoaded, BB);
1762
1763 Value *NewVal = PerformOp(Builder, Loaded);
1764
1765 Value *NewLoaded = nullptr;
1766 Value *Success = nullptr;
1767
1768 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1769 MemOpOrder == AtomicOrdering::Unordered
1770 ? AtomicOrdering::Monotonic
1771 : MemOpOrder,
1772 SSID, Success, NewLoaded, MetadataSrc);
1773 assert(Success && NewLoaded);
1774
1775 Loaded->addIncoming(NewLoaded, LoopBB);
1776
1777 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1778
1779 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1780 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1781 // to prevent misleading optimizations.
1783
1784 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1785 return NewLoaded;
1786}
1787
1788bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1789 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1790 unsigned ValueSize = getAtomicOpSize(CI);
1791
1792 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1793 default:
1794 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1795 case TargetLoweringBase::AtomicExpansionKind::None:
1796 if (ValueSize < MinCASSize)
1797 return expandPartwordCmpXchg(CI);
1798 return false;
1799 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1800 return expandAtomicCmpXchg(CI);
1801 }
1802 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1803 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1804 return true;
1805 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1806 return lowerAtomicCmpXchgInst(CI);
1807 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1808 TLI->emitExpandAtomicCmpXchg(CI);
1809 return true;
1810 }
1811 }
1812}
1813
1814bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1815 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1816 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1817 Builder.setIsFPConstrained(
1818 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1819
1820 // FIXME: If FP exceptions are observable, we should force them off for the
1821 // loop for the FP atomics.
1822 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1823 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1824 AI->getOrdering(), AI->getSyncScopeID(),
1825 [&](IRBuilderBase &Builder, Value *Loaded) {
1826 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1827 AI->getValOperand());
1828 },
1829 CreateCmpXchg, /*MetadataSrc=*/AI);
1830
1831 AI->replaceAllUsesWith(Loaded);
1832 AI->eraseFromParent();
1833 return true;
1834}
1835
1836// In order to use one of the sized library calls such as
1837// __atomic_fetch_add_4, the alignment must be sufficient, the size
1838// must be one of the potentially-specialized sizes, and the value
1839// type must actually exist in C on the target (otherwise, the
1840// function wouldn't actually be defined.)
1841static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1842 const DataLayout &DL) {
1843 // TODO: "LargestSize" is an approximation for "largest type that
1844 // you can express in C". It seems to be the case that int128 is
1845 // supported on all 64-bit platforms, otherwise only up to 64-bit
1846 // integers are supported. If we get this wrong, then we'll try to
1847 // call a sized libcall that doesn't actually exist. There should
1848 // really be some more reliable way in LLVM of determining integer
1849 // sizes which are valid in the target's C ABI...
1850 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1851 return Alignment >= Size &&
1852 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1853 Size <= LargestSize;
1854}
1855
1856void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1857 static const RTLIB::Libcall Libcalls[6] = {
1858 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1859 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1860 unsigned Size = getAtomicOpSize(I);
1861
1862 bool Expanded = expandAtomicOpToLibcall(
1863 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1864 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1865 if (!Expanded)
1866 handleUnsupportedAtomicSize(I, "atomic load");
1867}
1868
1869void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1870 static const RTLIB::Libcall Libcalls[6] = {
1871 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1872 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1873 unsigned Size = getAtomicOpSize(I);
1874
1875 bool Expanded = expandAtomicOpToLibcall(
1876 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1877 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1878 if (!Expanded)
1879 handleUnsupportedAtomicSize(I, "atomic store");
1880}
1881
1882void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1883 const Twine &AtomicOpName,
1884 Instruction *DiagnosticInst) {
1885 static const RTLIB::Libcall Libcalls[6] = {
1886 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1887 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1888 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1889 unsigned Size = getAtomicOpSize(I);
1890
1891 bool Expanded = expandAtomicOpToLibcall(
1892 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1893 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1894 Libcalls);
1895 if (!Expanded)
1896 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1897}
1898
1900 static const RTLIB::Libcall LibcallsXchg[6] = {
1901 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1902 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1903 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1904 static const RTLIB::Libcall LibcallsAdd[6] = {
1905 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1906 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1907 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1908 static const RTLIB::Libcall LibcallsSub[6] = {
1909 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1910 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1911 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1912 static const RTLIB::Libcall LibcallsAnd[6] = {
1913 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1914 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1915 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1916 static const RTLIB::Libcall LibcallsOr[6] = {
1917 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1918 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1919 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1920 static const RTLIB::Libcall LibcallsXor[6] = {
1921 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1922 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1923 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1924 static const RTLIB::Libcall LibcallsNand[6] = {
1925 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1926 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1927 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1928
1929 switch (Op) {
1931 llvm_unreachable("Should not have BAD_BINOP.");
1933 return ArrayRef(LibcallsXchg);
1934 case AtomicRMWInst::Add:
1935 return ArrayRef(LibcallsAdd);
1936 case AtomicRMWInst::Sub:
1937 return ArrayRef(LibcallsSub);
1938 case AtomicRMWInst::And:
1939 return ArrayRef(LibcallsAnd);
1940 case AtomicRMWInst::Or:
1941 return ArrayRef(LibcallsOr);
1942 case AtomicRMWInst::Xor:
1943 return ArrayRef(LibcallsXor);
1945 return ArrayRef(LibcallsNand);
1946 case AtomicRMWInst::Max:
1947 case AtomicRMWInst::Min:
1962 // No atomic libcalls are available for these.
1963 return {};
1964 }
1965 llvm_unreachable("Unexpected AtomicRMW operation.");
1966}
1967
1968void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1969 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1970
1971 unsigned Size = getAtomicOpSize(I);
1972
1973 bool Success = false;
1974 if (!Libcalls.empty())
1975 Success = expandAtomicOpToLibcall(
1976 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1977 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1978
1979 // The expansion failed: either there were no libcalls at all for
1980 // the operation (min/max), or there were only size-specialized
1981 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1982 // CAS libcall, via a CAS loop, instead.
1983 if (!Success) {
1984 expandAtomicRMWToCmpXchg(
1985 I, [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1986 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1987 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1988 Instruction *MetadataSrc) {
1989 // Create the CAS instruction normally...
1990 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1991 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1993 if (MetadataSrc)
1994 copyMetadataForAtomic(*Pair, *MetadataSrc);
1995
1996 Success = Builder.CreateExtractValue(Pair, 1, "success");
1997 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1998
1999 // ...and then expand the CAS into a libcall.
2000 expandAtomicCASToLibcall(
2001 Pair,
2002 "atomicrmw " + AtomicRMWInst::getOperationName(I->getOperation()),
2003 MetadataSrc);
2004 });
2005 }
2006}
2007
2008// A helper routine for the above expandAtomic*ToLibcall functions.
2009//
2010// 'Libcalls' contains an array of enum values for the particular
2011// ATOMIC libcalls to be emitted. All of the other arguments besides
2012// 'I' are extracted from the Instruction subclass by the
2013// caller. Depending on the particular call, some will be null.
2014bool AtomicExpandImpl::expandAtomicOpToLibcall(
2015 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2016 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2017 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2018 assert(Libcalls.size() == 6);
2019
2020 LLVMContext &Ctx = I->getContext();
2021 Module *M = I->getModule();
2022 const DataLayout &DL = M->getDataLayout();
2023 IRBuilder<> Builder(I);
2024 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2025
2026 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2027 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
2028
2029 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2030 Size == 16) {
2031 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2032 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2033 // rules handles this correctly if we pass it as a v2i64 rather than
2034 // i128. This is what Clang does in the frontend for such types as well
2035 // (see WinX86_64ABIInfo::classify in Clang).
2036 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
2037 }
2038
2039 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
2040
2041 // TODO: the "order" argument type is "int", not int32. So
2042 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2043 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2044 Constant *OrderingVal =
2045 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2046 Constant *Ordering2Val = nullptr;
2047 if (CASExpected) {
2048 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2049 Ordering2Val =
2050 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2051 }
2052 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2053
2054 RTLIB::Libcall RTLibType;
2055 if (UseSizedLibcall) {
2056 switch (Size) {
2057 case 1:
2058 RTLibType = Libcalls[1];
2059 break;
2060 case 2:
2061 RTLibType = Libcalls[2];
2062 break;
2063 case 4:
2064 RTLibType = Libcalls[3];
2065 break;
2066 case 8:
2067 RTLibType = Libcalls[4];
2068 break;
2069 case 16:
2070 RTLibType = Libcalls[5];
2071 break;
2072 }
2073 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2074 RTLibType = Libcalls[0];
2075 } else {
2076 // Can't use sized function, and there's no generic for this
2077 // operation, so give up.
2078 return false;
2079 }
2080
2081 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2082 if (LibcallImpl == RTLIB::Unsupported) {
2083 // This target does not implement the requested atomic libcall so give up.
2084 return false;
2085 }
2086
2087 // Build up the function call. There's two kinds. First, the sized
2088 // variants. These calls are going to be one of the following (with
2089 // N=1,2,4,8,16):
2090 // iN __atomic_load_N(iN *ptr, int ordering)
2091 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2092 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2093 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2094 // int success_order, int failure_order)
2095 //
2096 // Note that these functions can be used for non-integer atomic
2097 // operations, the values just need to be bitcast to integers on the
2098 // way in and out.
2099 //
2100 // And, then, the generic variants. They look like the following:
2101 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2102 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2103 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2104 // int ordering)
2105 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2106 // void *desired, int success_order,
2107 // int failure_order)
2108 //
2109 // The different signatures are built up depending on the
2110 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2111 // variables.
2112
2113 AllocaInst *AllocaCASExpected = nullptr;
2114 AllocaInst *AllocaValue = nullptr;
2115 AllocaInst *AllocaResult = nullptr;
2116
2117 Type *ResultTy;
2119 AttributeList Attr;
2120
2121 // 'size' argument.
2122 if (!UseSizedLibcall) {
2123 // Note, getIntPtrType is assumed equivalent to size_t.
2124 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2125 }
2126
2127 // 'ptr' argument.
2128 // note: This assumes all address spaces share a common libfunc
2129 // implementation and that addresses are convertable. For systems without
2130 // that property, we'd need to extend this mechanism to support AS-specific
2131 // families of atomic intrinsics.
2132 Value *PtrVal = PointerOperand;
2133 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2134 Args.push_back(PtrVal);
2135
2136 // 'expected' argument, if present.
2137 if (CASExpected) {
2138 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2139 AllocaCASExpected->setAlignment(AllocaAlignment);
2140 Builder.CreateLifetimeStart(AllocaCASExpected);
2141 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2142 Args.push_back(AllocaCASExpected);
2143 }
2144
2145 // 'val' argument ('desired' for cas), if present.
2146 if (ValueOperand) {
2147 if (UseSizedLibcall) {
2148 Value *IntValue =
2149 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2150 Args.push_back(IntValue);
2151 } else {
2152 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2153 AllocaValue->setAlignment(AllocaAlignment);
2154 Builder.CreateLifetimeStart(AllocaValue);
2155 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2156 Args.push_back(AllocaValue);
2157 }
2158 }
2159
2160 // 'ret' argument.
2161 if (!CASExpected && HasResult && !UseSizedLibcall) {
2162 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2163 AllocaResult->setAlignment(AllocaAlignment);
2164 Builder.CreateLifetimeStart(AllocaResult);
2165 Args.push_back(AllocaResult);
2166 }
2167
2168 // 'ordering' ('success_order' for cas) argument.
2169 Args.push_back(OrderingVal);
2170
2171 // 'failure_order' argument, if present.
2172 if (Ordering2Val)
2173 Args.push_back(Ordering2Val);
2174
2175 // Now, the return type.
2176 if (CASExpected) {
2177 ResultTy = Type::getInt1Ty(Ctx);
2178 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2179 } else if (HasResult && UseSizedLibcall)
2180 ResultTy = SizedIntTy;
2181 else
2182 ResultTy = Type::getVoidTy(Ctx);
2183
2184 // Done with setting up arguments and return types, create the call:
2186 for (Value *Arg : Args)
2187 ArgTys.push_back(Arg->getType());
2188 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2189 FunctionCallee LibcallFn = M->getOrInsertFunction(
2191 Attr);
2192 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2193 Call->setAttributes(Attr);
2194 Value *Result = Call;
2195
2196 // And then, extract the results...
2197 if (ValueOperand && !UseSizedLibcall)
2198 Builder.CreateLifetimeEnd(AllocaValue);
2199
2200 if (CASExpected) {
2201 // The final result from the CAS is {load of 'expected' alloca, bool result
2202 // from call}
2203 Type *FinalResultTy = I->getType();
2204 Value *V = PoisonValue::get(FinalResultTy);
2205 Value *ExpectedOut = Builder.CreateAlignedLoad(
2206 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2207 Builder.CreateLifetimeEnd(AllocaCASExpected);
2208 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2209 V = Builder.CreateInsertValue(V, Result, 1);
2211 } else if (HasResult) {
2212 Value *V;
2213 if (UseSizedLibcall)
2214 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2215 else {
2216 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2217 AllocaAlignment);
2218 Builder.CreateLifetimeEnd(AllocaResult);
2219 }
2221 }
2222 I->eraseFromParent();
2223 return true;
2224}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I, raw_ostream &OS)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallString class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1952
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2662
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1918
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1237
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1379
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2655
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2218
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2257
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2359
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1231
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2305
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2520
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2355
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:358
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1901
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1532
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2101
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1591
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2213
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2534
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1965
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1937
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1613
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2228
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2835
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.