60#define DEBUG_TYPE "atomic-expand"
64class AtomicExpandImpl {
72 bool tryExpandAtomicLoad(
LoadInst *LI);
73 bool expandAtomicLoadToLL(
LoadInst *LI);
74 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
84 void expandAtomicOpToLLSC(
88 void expandPartwordAtomicRMW(
96 static Value *insertRMWCmpXchgLoop(
112 void expandAtomicLoadToLibcall(
LoadInst *LI);
113 void expandAtomicStoreToLibcall(
StoreInst *LI);
137struct ReplacementIRBuilder
138 :
IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
149 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
152 MMRAMD =
I->getMetadata(LLVMContext::MD_mmra);
157 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
163char AtomicExpandLegacy::ID = 0;
168 "Expand Atomic instructions",
false,
false)
175 const DataLayout &
DL = LI->getModule()->getDataLayout();
176 return DL.getTypeStoreSize(LI->getType());
180 const DataLayout &
DL = SI->getModule()->getDataLayout();
181 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
197template <
typename Inst>
200 Align Alignment =
I->getAlign();
201 return Alignment >=
Size &&
206 const auto *Subtarget =
TM->getSubtargetImpl(
F);
207 if (!Subtarget->enableAtomicExpand())
209 TLI = Subtarget->getTargetLowering();
210 DL = &
F.getParent()->getDataLayout();
217 if (
I.isAtomic() && !isa<FenceInst>(&
I))
220 bool MadeChange =
false;
221 for (
auto *
I : AtomicInsts) {
222 auto LI = dyn_cast<LoadInst>(
I);
223 auto SI = dyn_cast<StoreInst>(
I);
224 auto RMWI = dyn_cast<AtomicRMWInst>(
I);
225 auto CASI = dyn_cast<AtomicCmpXchgInst>(
I);
226 assert((LI || SI || RMWI || CASI) &&
"Unknown atomic instruction");
231 expandAtomicLoadToLibcall(LI);
237 expandAtomicStoreToLibcall(SI);
243 expandAtomicRMWToLibcall(RMWI);
249 expandAtomicCASToLibcall(CASI);
255 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
256 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
257 I = LI = convertAtomicLoadToIntegerType(LI);
260 TLI->shouldCastAtomicStoreInIR(SI) ==
261 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
262 I =
SI = convertAtomicStoreToIntegerType(SI);
265 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
266 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
267 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
272 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
275 I = CASI = convertCmpXchgToIntegerType(CASI);
280 if (TLI->shouldInsertFencesForAtomic(
I)) {
281 auto FenceOrdering = AtomicOrdering::Monotonic;
283 FenceOrdering = LI->getOrdering();
284 LI->setOrdering(AtomicOrdering::Monotonic);
286 FenceOrdering =
SI->getOrdering();
287 SI->setOrdering(AtomicOrdering::Monotonic);
290 FenceOrdering = RMWI->getOrdering();
291 RMWI->setOrdering(AtomicOrdering::Monotonic);
293 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
294 TargetLoweringBase::AtomicExpansionKind::None &&
302 FenceOrdering = CASI->getMergedOrdering();
303 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
304 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
307 if (FenceOrdering != AtomicOrdering::Monotonic) {
308 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
310 }
else if (
I->hasAtomicStore() &&
311 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
312 auto FenceOrdering = AtomicOrdering::Monotonic;
314 FenceOrdering =
SI->getOrdering();
316 FenceOrdering = RMWI->getOrdering();
317 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
318 TargetLoweringBase::AtomicExpansionKind::LLSC)
320 FenceOrdering = CASI->getSuccessOrdering();
323 if (
auto TrailingFence =
324 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
325 TrailingFence->moveAfter(
I);
331 MadeChange |= tryExpandAtomicLoad(LI);
333 MadeChange |= tryExpandAtomicStore(SI);
340 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
343 MadeChange |= tryExpandAtomicRMW(RMWI);
346 MadeChange |= tryExpandAtomicCmpXchg(CASI);
351bool AtomicExpandLegacy::runOnFunction(
Function &
F) {
353 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
358 return AE.run(
F,
TM);
362 return new AtomicExpandLegacy();
369 bool Changed = AE.run(
F,
TM);
376bool AtomicExpandImpl::bracketInstWithFences(
Instruction *
I,
378 ReplacementIRBuilder Builder(
I, *
DL);
380 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
382 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
386 TrailingFence->moveAfter(
I);
388 return (LeadingFence || TrailingFence);
394 EVT VT = TLI->getMemValueType(
DL,
T);
405 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
407 ReplacementIRBuilder Builder(LI, *
DL);
411 auto *NewLI = Builder.CreateLoad(NewTy,
Addr);
412 NewLI->setAlignment(LI->
getAlign());
415 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
417 Value *NewVal = Builder.CreateBitCast(NewLI, LI->
getType());
424AtomicExpandImpl::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
427 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
429 ReplacementIRBuilder Builder(RMWI, *
DL);
434 ? Builder.CreatePtrToInt(Val, NewTy)
435 : Builder.CreateBitCast(Val, NewTy);
441 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
444 ? Builder.CreateIntToPtr(NewRMWI, RMWI->
getType())
445 : Builder.CreateBitCast(NewRMWI, RMWI->
getType());
451bool AtomicExpandImpl::tryExpandAtomicLoad(
LoadInst *LI) {
452 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
456 expandAtomicOpToLLSC(
462 return expandAtomicLoadToLL(LI);
464 return expandAtomicLoadToCmpXchg(LI);
473bool AtomicExpandImpl::tryExpandAtomicStore(
StoreInst *SI) {
474 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
478 expandAtomicStore(SI);
488bool AtomicExpandImpl::expandAtomicLoadToLL(
LoadInst *LI) {
489 ReplacementIRBuilder Builder(LI, *
DL);
494 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
496 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
504bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
505 ReplacementIRBuilder Builder(LI, *
DL);
514 Value *Pair = Builder.CreateAtomicCmpXchg(
517 Value *Loaded = Builder.CreateExtractValue(Pair, 0,
"loaded");
534 ReplacementIRBuilder Builder(SI, *
DL);
535 auto *
M =
SI->getModule();
536 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
538 Value *NewVal = Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
546 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
547 SI->eraseFromParent();
551void AtomicExpandImpl::expandAtomicStore(
StoreInst *SI) {
558 ReplacementIRBuilder Builder(SI, *
DL);
566 SI->getAlign(), RMWOrdering);
567 SI->eraseFromParent();
570 tryExpandAtomicRMW(AI);
589 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
598bool AtomicExpandImpl::tryExpandAtomicRMW(
AtomicRMWInst *AI) {
605 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
607 if (ValueSize < MinCASSize) {
608 expandPartwordAtomicRMW(AI,
621 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
623 if (ValueSize < MinCASSize) {
624 expandPartwordAtomicRMW(AI,
635 <<
"A compare and swap loop was generated for an atomic "
637 << MemScope <<
" memory scope";
644 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
646 if (ValueSize < MinCASSize) {
651 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
655 expandAtomicRMWToMaskedIntrinsic(AI);
659 TLI->emitBitTestAtomicRMWIntrinsic(AI);
663 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
669 TLI->emitExpandAtomicRMW(AI);
678struct PartwordMaskValues {
680 Type *WordType =
nullptr;
682 Type *IntValueType =
nullptr;
683 Value *AlignedAddr =
nullptr;
684 Align AlignedAddrAlignment;
686 Value *ShiftAmt =
nullptr;
688 Value *Inv_Mask =
nullptr;
693 auto PrintObj = [&
O](
auto *
V) {
700 O <<
"PartwordMaskValues {\n";
702 PrintObj(PMV.WordType);
704 PrintObj(PMV.ValueType);
705 O <<
" AlignedAddr: ";
706 PrintObj(PMV.AlignedAddr);
707 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
709 PrintObj(PMV.ShiftAmt);
713 PrintObj(PMV.Inv_Mask);
739 unsigned MinWordSize) {
740 PartwordMaskValues PMV;
745 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
747 PMV.ValueType = PMV.IntValueType =
ValueType;
748 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
752 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
754 if (PMV.ValueType == PMV.WordType) {
755 PMV.AlignedAddr =
Addr;
756 PMV.AlignedAddrAlignment = AddrAlign;
757 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
758 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0,
true);
762 PMV.AlignedAddrAlignment =
Align(MinWordSize);
764 assert(ValueSize < MinWordSize);
767 IntegerType *IntTy =
DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
770 if (AddrAlign < MinWordSize) {
772 Intrinsic::ptrmask, {PtrTy, IntTy},
773 {
Addr, ConstantInt::get(IntTy, ~(
uint64_t)(MinWordSize - 1))},
nullptr,
777 PtrLSB = Builder.
CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
780 PMV.AlignedAddr =
Addr;
784 if (
DL.isLittleEndian()) {
786 PMV.ShiftAmt = Builder.
CreateShl(PtrLSB, 3);
790 Builder.
CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
793 PMV.ShiftAmt = Builder.
CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
795 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
798 PMV.Inv_Mask = Builder.
CreateNot(PMV.Mask,
"Inv_Mask");
804 const PartwordMaskValues &PMV) {
805 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
806 if (PMV.WordType == PMV.ValueType)
815 Value *Updated,
const PartwordMaskValues &PMV) {
816 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
817 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
818 if (PMV.WordType == PMV.ValueType)
825 Builder.
CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
837 const PartwordMaskValues &PMV) {
844 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, Shifted_Inc);
858 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Masked);
891void AtomicExpandImpl::expandPartwordAtomicRMW(
897 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
903 ReplacementIRBuilder Builder(AI, *
DL);
905 PartwordMaskValues PMV =
907 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
909 Value *ValOperand_Shifted =
nullptr;
913 Builder.CreateShl(Builder.CreateZExt(AI->
getValOperand(), PMV.WordType),
914 PMV.ShiftAmt,
"ValOperand_Shifted");
924 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
925 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
929 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
930 PMV.AlignedAddrAlignment, MemOpOrder,
941 ReplacementIRBuilder Builder(AI, *
DL);
946 "Unable to widen operation");
948 PartwordMaskValues PMV =
950 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
952 Value *ValOperand_Shifted =
954 PMV.ShiftAmt,
"ValOperand_Shifted");
960 Builder.
CreateOr(ValOperand_Shifted, PMV.Inv_Mask,
"AndOperand");
962 NewOperand = ValOperand_Shifted;
965 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1017 ReplacementIRBuilder Builder(CI, *
DL);
1028 std::prev(BB->
end())->eraseFromParent();
1031 PartwordMaskValues PMV =
1033 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1036 Value *NewVal_Shifted =
1038 Value *Cmp_Shifted =
1045 Value *InitLoaded_MaskOut = Builder.
CreateAnd(InitLoaded, PMV.Inv_Mask);
1051 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1054 Value *FullWord_NewVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Shifted);
1055 Value *FullWord_Cmp = Builder.
CreateOr(Loaded_MaskOut, Cmp_Shifted);
1057 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1085 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1100void AtomicExpandImpl::expandAtomicOpToLLSC(
1104 ReplacementIRBuilder Builder(
I, *
DL);
1105 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1106 MemOpOrder, PerformOp);
1108 I->replaceAllUsesWith(Loaded);
1109 I->eraseFromParent();
1112void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1113 ReplacementIRBuilder Builder(AI, *
DL);
1115 PartwordMaskValues PMV =
1117 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1125 CastOp = Instruction::SExt;
1129 PMV.ShiftAmt,
"ValOperand_Shifted");
1130 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1131 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1138void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1140 ReplacementIRBuilder Builder(CI, *
DL);
1144 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1152 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1153 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1159 CmpVal_Shifted, Builder.
CreateAnd(OldVal, PMV.Mask),
"Success");
1166Value *AtomicExpandImpl::insertRMWLLSCLoop(
1175 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1176 "Expected at least natural alignment at this point.");
1196 std::prev(BB->
end())->eraseFromParent();
1202 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1204 Value *NewVal = PerformOp(Builder, Loaded);
1206 Value *StoreSuccess =
1207 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1225 M->getDataLayout());
1227 ReplacementIRBuilder Builder(CI, *
DL);
1239 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1266 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1280 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1287 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1341 auto ReleasedLoadBB =
1345 auto ReleasingStoreBB =
1349 ReplacementIRBuilder Builder(CI, *
DL);
1354 std::prev(BB->
end())->eraseFromParent();
1356 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1357 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1359 PartwordMaskValues PMV =
1361 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1366 Value *UnreleasedLoad =
1367 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1368 Value *UnreleasedLoadExtract =
1375 Builder.
CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1378 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1379 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1384 Builder.
CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1385 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1386 Value *NewValueInsert =
1388 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1389 PMV.AlignedAddr, MemOpOrder);
1392 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1394 CI->
isWeak() ? FailureBB : RetryBB);
1398 if (HasReleasedLoadBB) {
1400 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1407 Builder.
CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1409 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1416 if (ShouldInsertFencesForAtomic ||
1417 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1418 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1424 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1425 if (HasReleasedLoadBB)
1426 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1431 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1437 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1439 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1440 if (ShouldInsertFencesForAtomic)
1441 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1451 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1452 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1459 Value *LoadedFull = LoadedExit;
1473 "weird extraction from { iN, i1 }");
1484 for (
auto *EV : PrunedInsts)
1501bool AtomicExpandImpl::isIdempotentRMW(
AtomicRMWInst *RMWI) {
1514 return C->isMinusOne();
1521bool AtomicExpandImpl::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1522 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1523 tryExpandAtomicLoad(ResultingLoad);
1529Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1560 std::prev(BB->
end())->eraseFromParent();
1570 Value *NewVal = PerformOp(Builder, Loaded);
1572 Value *NewLoaded =
nullptr;
1575 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1591 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1594 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1598 if (ValueSize < MinCASSize)
1599 return expandPartwordCmpXchg(CI);
1602 return expandAtomicCmpXchg(CI);
1605 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1616 Builder.setIsFPConstrained(
1621 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1625 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1626 AI->getValOperand());
1649 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1650 return Alignment >=
Size &&
1652 Size <= LargestSize;
1655void AtomicExpandImpl::expandAtomicLoadToLibcall(
LoadInst *
I) {
1657 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1658 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1661 bool expanded = expandAtomicOpToLibcall(
1662 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1668void AtomicExpandImpl::expandAtomicStoreToLibcall(
StoreInst *
I) {
1670 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1671 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1674 bool expanded = expandAtomicOpToLibcall(
1675 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1683 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1684 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1685 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1688 bool expanded = expandAtomicOpToLibcall(
1689 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1690 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1698 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1699 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1700 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1702 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1703 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1704 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1706 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1707 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1708 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1710 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1711 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1712 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1714 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1715 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1716 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1718 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1719 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1720 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1722 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1723 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1724 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1759void AtomicExpandImpl::expandAtomicRMWToLibcall(
AtomicRMWInst *
I) {
1765 if (!Libcalls.
empty())
1766 Success = expandAtomicOpToLibcall(
1767 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1781 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1787 expandAtomicCASToLibcall(Pair);
1798bool AtomicExpandImpl::expandAtomicOpToLibcall(
1808 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1813 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1830 if (UseSizedLibcall) {
1833 RTLibType = Libcalls[1];
1836 RTLibType = Libcalls[2];
1839 RTLibType = Libcalls[3];
1842 RTLibType = Libcalls[4];
1845 RTLibType = Libcalls[5];
1848 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1849 RTLibType = Libcalls[0];
1856 if (!TLI->getLibcallName(RTLibType)) {
1896 if (!UseSizedLibcall) {
1898 Args.push_back(ConstantInt::get(
DL.getIntPtrType(Ctx),
Size));
1906 Value *PtrVal = PointerOperand;
1908 Args.push_back(PtrVal);
1912 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1916 Args.push_back(AllocaCASExpected);
1921 if (UseSizedLibcall) {
1924 Args.push_back(IntValue);
1926 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1930 Args.push_back(AllocaValue);
1935 if (!CASExpected && HasResult && !UseSizedLibcall) {
1936 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1939 Args.push_back(AllocaResult);
1943 Args.push_back(OrderingVal);
1947 Args.push_back(Ordering2Val);
1953 }
else if (HasResult && UseSizedLibcall)
1954 ResultTy = SizedIntTy;
1960 for (
Value *Arg : Args)
1964 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1966 Call->setAttributes(Attr);
1970 if (ValueOperand && !UseSizedLibcall)
1976 Type *FinalResultTy =
I->getType();
1979 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
1983 I->replaceAllUsesWith(V);
1984 }
else if (HasResult) {
1986 if (UseSizedLibcall)
1993 I->replaceAllUsesWith(V);
1995 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Common base class shared among various IRBuilders.
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
BasicBlock::iterator GetInsertPoint() const
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.