59#define DEBUG_TYPE "atomic-expand"
63class AtomicExpandImpl {
71 bool tryExpandAtomicLoad(
LoadInst *LI);
72 bool expandAtomicLoadToLL(
LoadInst *LI);
73 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
83 void expandAtomicOpToLLSC(
87 void expandPartwordAtomicRMW(
95 static Value *insertRMWCmpXchgLoop(
111 void expandAtomicLoadToLibcall(
LoadInst *LI);
112 void expandAtomicStoreToLibcall(
StoreInst *LI);
136struct ReplacementIRBuilder :
IRBuilder<InstSimplifyFolder> {
147char AtomicExpandLegacy::ID = 0;
152 "Expand Atomic instructions",
false,
false)
159 const DataLayout &
DL = LI->getModule()->getDataLayout();
160 return DL.getTypeStoreSize(LI->getType());
164 const DataLayout &
DL = SI->getModule()->getDataLayout();
165 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
181template <
typename Inst>
184 Align Alignment =
I->getAlign();
185 return Alignment >=
Size &&
190 const auto *Subtarget =
TM->getSubtargetImpl(
F);
191 if (!Subtarget->enableAtomicExpand())
193 TLI = Subtarget->getTargetLowering();
194 DL = &
F.getParent()->getDataLayout();
201 if (
I.isAtomic() && !isa<FenceInst>(&
I))
204 bool MadeChange =
false;
205 for (
auto *
I : AtomicInsts) {
206 auto LI = dyn_cast<LoadInst>(
I);
207 auto SI = dyn_cast<StoreInst>(
I);
208 auto RMWI = dyn_cast<AtomicRMWInst>(
I);
209 auto CASI = dyn_cast<AtomicCmpXchgInst>(
I);
210 assert((LI || SI || RMWI || CASI) &&
"Unknown atomic instruction");
215 expandAtomicLoadToLibcall(LI);
221 expandAtomicStoreToLibcall(SI);
227 expandAtomicRMWToLibcall(RMWI);
233 expandAtomicCASToLibcall(CASI);
239 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
240 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
241 I = LI = convertAtomicLoadToIntegerType(LI);
244 TLI->shouldCastAtomicStoreInIR(SI) ==
245 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
246 I =
SI = convertAtomicStoreToIntegerType(SI);
249 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
250 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
251 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
256 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
259 I = CASI = convertCmpXchgToIntegerType(CASI);
264 if (TLI->shouldInsertFencesForAtomic(
I)) {
265 auto FenceOrdering = AtomicOrdering::Monotonic;
267 FenceOrdering = LI->getOrdering();
268 LI->setOrdering(AtomicOrdering::Monotonic);
270 FenceOrdering =
SI->getOrdering();
271 SI->setOrdering(AtomicOrdering::Monotonic);
274 FenceOrdering = RMWI->getOrdering();
275 RMWI->setOrdering(AtomicOrdering::Monotonic);
277 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
278 TargetLoweringBase::AtomicExpansionKind::None &&
286 FenceOrdering = CASI->getMergedOrdering();
287 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
288 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
291 if (FenceOrdering != AtomicOrdering::Monotonic) {
292 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
294 }
else if (
I->hasAtomicStore() &&
295 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
296 auto FenceOrdering = AtomicOrdering::Monotonic;
298 FenceOrdering =
SI->getOrdering();
300 FenceOrdering = RMWI->getOrdering();
301 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
302 TargetLoweringBase::AtomicExpansionKind::LLSC)
304 FenceOrdering = CASI->getSuccessOrdering();
307 if (
auto TrailingFence =
308 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
309 TrailingFence->moveAfter(
I);
315 MadeChange |= tryExpandAtomicLoad(LI);
317 MadeChange |= tryExpandAtomicStore(SI);
324 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
327 MadeChange |= tryExpandAtomicRMW(RMWI);
330 MadeChange |= tryExpandAtomicCmpXchg(CASI);
335bool AtomicExpandLegacy::runOnFunction(
Function &
F) {
337 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
342 return AE.run(
F,
TM);
346 return new AtomicExpandLegacy();
353 bool Changed = AE.run(
F,
TM);
360bool AtomicExpandImpl::bracketInstWithFences(
Instruction *
I,
362 ReplacementIRBuilder Builder(
I, *
DL);
364 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
366 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
370 TrailingFence->moveAfter(
I);
372 return (LeadingFence || TrailingFence);
378 EVT VT = TLI->getMemValueType(
DL,
T);
389 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
391 ReplacementIRBuilder Builder(LI, *
DL);
395 auto *NewLI = Builder.CreateLoad(NewTy,
Addr);
396 NewLI->setAlignment(LI->
getAlign());
399 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
401 Value *NewVal = Builder.CreateBitCast(NewLI, LI->
getType());
408AtomicExpandImpl::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
411 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
413 ReplacementIRBuilder Builder(RMWI, *
DL);
418 ? Builder.CreatePtrToInt(Val, NewTy)
419 : Builder.CreateBitCast(Val, NewTy);
421 auto *NewRMWI = Builder.CreateAtomicRMW(
424 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
427 ? Builder.CreateIntToPtr(NewRMWI, RMWI->
getType())
428 : Builder.CreateBitCast(NewRMWI, RMWI->
getType());
434bool AtomicExpandImpl::tryExpandAtomicLoad(
LoadInst *LI) {
435 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
439 expandAtomicOpToLLSC(
445 return expandAtomicLoadToLL(LI);
447 return expandAtomicLoadToCmpXchg(LI);
456bool AtomicExpandImpl::tryExpandAtomicStore(
StoreInst *SI) {
457 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
461 expandAtomicStore(SI);
471bool AtomicExpandImpl::expandAtomicLoadToLL(
LoadInst *LI) {
472 ReplacementIRBuilder Builder(LI, *
DL);
477 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
479 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
487bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
488 ReplacementIRBuilder Builder(LI, *
DL);
497 Value *Pair = Builder.CreateAtomicCmpXchg(
500 Value *Loaded = Builder.CreateExtractValue(Pair, 0,
"loaded");
517 ReplacementIRBuilder Builder(SI, *
DL);
518 auto *
M =
SI->getModule();
519 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
521 Value *NewVal = Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
529 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
530 SI->eraseFromParent();
534void AtomicExpandImpl::expandAtomicStore(
StoreInst *SI) {
541 ReplacementIRBuilder Builder(SI, *
DL);
549 SI->getAlign(), RMWOrdering);
550 SI->eraseFromParent();
553 tryExpandAtomicRMW(AI);
572 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
581bool AtomicExpandImpl::tryExpandAtomicRMW(
AtomicRMWInst *AI) {
588 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
590 if (ValueSize < MinCASSize) {
591 expandPartwordAtomicRMW(AI,
604 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
606 if (ValueSize < MinCASSize) {
607 expandPartwordAtomicRMW(AI,
618 <<
"A compare and swap loop was generated for an atomic "
620 << MemScope <<
" memory scope";
627 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
629 if (ValueSize < MinCASSize) {
634 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
638 expandAtomicRMWToMaskedIntrinsic(AI);
642 TLI->emitBitTestAtomicRMWIntrinsic(AI);
646 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
652 TLI->emitExpandAtomicRMW(AI);
661struct PartwordMaskValues {
663 Type *WordType =
nullptr;
665 Type *IntValueType =
nullptr;
666 Value *AlignedAddr =
nullptr;
667 Align AlignedAddrAlignment;
669 Value *ShiftAmt =
nullptr;
671 Value *Inv_Mask =
nullptr;
676 auto PrintObj = [&
O](
auto *
V) {
683 O <<
"PartwordMaskValues {\n";
685 PrintObj(PMV.WordType);
687 PrintObj(PMV.ValueType);
688 O <<
" AlignedAddr: ";
689 PrintObj(PMV.AlignedAddr);
690 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
692 PrintObj(PMV.ShiftAmt);
696 PrintObj(PMV.Inv_Mask);
722 unsigned MinWordSize) {
723 PartwordMaskValues PMV;
728 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
730 PMV.ValueType = PMV.IntValueType =
ValueType;
731 if (PMV.ValueType->isFloatingPointTy())
735 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
737 if (PMV.ValueType == PMV.WordType) {
738 PMV.AlignedAddr =
Addr;
739 PMV.AlignedAddrAlignment = AddrAlign;
740 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
741 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0,
true);
745 PMV.AlignedAddrAlignment =
Align(MinWordSize);
747 assert(ValueSize < MinWordSize);
750 IntegerType *IntTy =
DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
753 if (AddrAlign < MinWordSize) {
755 Intrinsic::ptrmask, {PtrTy, IntTy},
756 {
Addr, ConstantInt::get(IntTy, ~(
uint64_t)(MinWordSize - 1))},
nullptr,
760 PtrLSB = Builder.
CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
763 PMV.AlignedAddr =
Addr;
767 if (
DL.isLittleEndian()) {
769 PMV.ShiftAmt = Builder.
CreateShl(PtrLSB, 3);
773 Builder.
CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
776 PMV.ShiftAmt = Builder.
CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
778 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
781 PMV.Inv_Mask = Builder.
CreateNot(PMV.Mask,
"Inv_Mask");
787 const PartwordMaskValues &PMV) {
788 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
789 if (PMV.WordType == PMV.ValueType)
798 Value *Updated,
const PartwordMaskValues &PMV) {
799 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
800 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
801 if (PMV.WordType == PMV.ValueType)
808 Builder.
CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
820 const PartwordMaskValues &PMV) {
827 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, Shifted_Inc);
841 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Masked);
874void AtomicExpandImpl::expandPartwordAtomicRMW(
880 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
886 ReplacementIRBuilder Builder(AI, *
DL);
888 PartwordMaskValues PMV =
890 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
892 Value *ValOperand_Shifted =
nullptr;
896 Builder.CreateShl(Builder.CreateZExt(AI->
getValOperand(), PMV.WordType),
897 PMV.ShiftAmt,
"ValOperand_Shifted");
907 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
908 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
912 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
913 PMV.AlignedAddrAlignment, MemOpOrder,
924 ReplacementIRBuilder Builder(AI, *
DL);
929 "Unable to widen operation");
931 PartwordMaskValues PMV =
933 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
935 Value *ValOperand_Shifted =
937 PMV.ShiftAmt,
"ValOperand_Shifted");
943 Builder.
CreateOr(PMV.Inv_Mask, ValOperand_Shifted,
"AndOperand");
945 NewOperand = ValOperand_Shifted;
948 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1000 ReplacementIRBuilder Builder(CI, *
DL);
1011 std::prev(BB->
end())->eraseFromParent();
1014 PartwordMaskValues PMV =
1016 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1019 Value *NewVal_Shifted =
1021 Value *Cmp_Shifted =
1028 Value *InitLoaded_MaskOut = Builder.
CreateAnd(InitLoaded, PMV.Inv_Mask);
1034 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1037 Value *FullWord_NewVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Shifted);
1038 Value *FullWord_Cmp = Builder.
CreateOr(Loaded_MaskOut, Cmp_Shifted);
1040 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1068 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1083void AtomicExpandImpl::expandAtomicOpToLLSC(
1087 ReplacementIRBuilder Builder(
I, *
DL);
1088 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1089 MemOpOrder, PerformOp);
1091 I->replaceAllUsesWith(Loaded);
1092 I->eraseFromParent();
1095void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1096 ReplacementIRBuilder Builder(AI, *
DL);
1098 PartwordMaskValues PMV =
1100 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1108 CastOp = Instruction::SExt;
1112 PMV.ShiftAmt,
"ValOperand_Shifted");
1113 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1114 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1121void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1123 ReplacementIRBuilder Builder(CI, *
DL);
1127 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1135 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1136 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1142 CmpVal_Shifted, Builder.
CreateAnd(OldVal, PMV.Mask),
"Success");
1149Value *AtomicExpandImpl::insertRMWLLSCLoop(
1158 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1159 "Expected at least natural alignment at this point.");
1179 std::prev(BB->
end())->eraseFromParent();
1185 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1187 Value *NewVal = PerformOp(Builder, Loaded);
1189 Value *StoreSuccess =
1190 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1208 M->getDataLayout());
1210 ReplacementIRBuilder Builder(CI, *
DL);
1222 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1249 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1263 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1270 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1324 auto ReleasedLoadBB =
1328 auto ReleasingStoreBB =
1332 ReplacementIRBuilder Builder(CI, *
DL);
1337 std::prev(BB->
end())->eraseFromParent();
1339 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1340 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1342 PartwordMaskValues PMV =
1344 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1349 Value *UnreleasedLoad =
1350 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1351 Value *UnreleasedLoadExtract =
1358 Builder.
CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1361 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1362 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1367 Builder.
CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1368 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1369 Value *NewValueInsert =
1371 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1372 PMV.AlignedAddr, MemOpOrder);
1375 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1377 CI->
isWeak() ? FailureBB : RetryBB);
1381 if (HasReleasedLoadBB) {
1383 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1390 Builder.
CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1392 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1399 if (ShouldInsertFencesForAtomic ||
1400 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1401 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1407 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1408 if (HasReleasedLoadBB)
1409 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1414 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1420 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1422 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1423 if (ShouldInsertFencesForAtomic)
1424 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1434 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1435 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1442 Value *LoadedFull = LoadedExit;
1456 "weird extraction from { iN, i1 }");
1467 for (
auto *EV : PrunedInsts)
1484bool AtomicExpandImpl::isIdempotentRMW(
AtomicRMWInst *RMWI) {
1497 return C->isMinusOne();
1504bool AtomicExpandImpl::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1505 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1506 tryExpandAtomicLoad(ResultingLoad);
1512Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1543 std::prev(BB->
end())->eraseFromParent();
1553 Value *NewVal = PerformOp(Builder, Loaded);
1555 Value *NewLoaded =
nullptr;
1558 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1574 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1577 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1581 if (ValueSize < MinCASSize)
1582 return expandPartwordCmpXchg(CI);
1585 return expandAtomicCmpXchg(CI);
1588 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1599 Builder.setIsFPConstrained(
1604 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1608 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1609 AI->getValOperand());
1632 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1633 return Alignment >=
Size &&
1635 Size <= LargestSize;
1638void AtomicExpandImpl::expandAtomicLoadToLibcall(
LoadInst *
I) {
1640 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1641 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1644 bool expanded = expandAtomicOpToLibcall(
1645 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1651void AtomicExpandImpl::expandAtomicStoreToLibcall(
StoreInst *
I) {
1653 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1654 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1657 bool expanded = expandAtomicOpToLibcall(
1658 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1666 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1667 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1668 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1671 bool expanded = expandAtomicOpToLibcall(
1672 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1673 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1681 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1682 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1683 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1685 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1686 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1687 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1689 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1690 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1691 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1693 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1694 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1695 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1697 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1698 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1699 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1701 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1702 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1703 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1705 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1706 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1707 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1742void AtomicExpandImpl::expandAtomicRMWToLibcall(
AtomicRMWInst *
I) {
1748 if (!Libcalls.
empty())
1749 Success = expandAtomicOpToLibcall(
1750 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1764 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1770 expandAtomicCASToLibcall(Pair);
1781bool AtomicExpandImpl::expandAtomicOpToLibcall(
1791 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1796 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1813 if (UseSizedLibcall) {
1816 RTLibType = Libcalls[1];
1819 RTLibType = Libcalls[2];
1822 RTLibType = Libcalls[3];
1825 RTLibType = Libcalls[4];
1828 RTLibType = Libcalls[5];
1831 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1832 RTLibType = Libcalls[0];
1839 if (!TLI->getLibcallName(RTLibType)) {
1879 if (!UseSizedLibcall) {
1881 Args.push_back(ConstantInt::get(
DL.getIntPtrType(Ctx),
Size));
1889 Value *PtrVal = PointerOperand;
1891 Args.push_back(PtrVal);
1895 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1899 Args.push_back(AllocaCASExpected);
1904 if (UseSizedLibcall) {
1907 Args.push_back(IntValue);
1909 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1913 Args.push_back(AllocaValue);
1918 if (!CASExpected && HasResult && !UseSizedLibcall) {
1919 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1922 Args.push_back(AllocaResult);
1926 Args.push_back(OrderingVal);
1930 Args.push_back(Ordering2Val);
1936 }
else if (HasResult && UseSizedLibcall)
1937 ResultTy = SizedIntTy;
1943 for (
Value *Arg : Args)
1947 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1949 Call->setAttributes(Attr);
1953 if (ValueOperand && !UseSizedLibcall)
1959 Type *FinalResultTy =
I->getType();
1962 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
1966 I->replaceAllUsesWith(V);
1967 }
else if (HasResult) {
1969 if (UseSizedLibcall)
1976 I->replaceAllUsesWith(V);
1978 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Common base class shared among various IRBuilders.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
BasicBlock::iterator GetInsertPoint() const
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.