59#define DEBUG_TYPE "atomic-expand"
63class AtomicExpandImpl {
71 bool tryExpandAtomicLoad(
LoadInst *LI);
72 bool expandAtomicLoadToLL(
LoadInst *LI);
73 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
83 void expandAtomicOpToLLSC(
87 void expandPartwordAtomicRMW(
95 static Value *insertRMWCmpXchgLoop(
111 void expandAtomicLoadToLibcall(
LoadInst *LI);
112 void expandAtomicStoreToLibcall(
StoreInst *LI);
136struct ReplacementIRBuilder :
IRBuilder<InstSimplifyFolder> {
143 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
150char AtomicExpandLegacy::ID = 0;
155 "Expand Atomic instructions",
false,
false)
162 const DataLayout &
DL = LI->getModule()->getDataLayout();
163 return DL.getTypeStoreSize(LI->getType());
167 const DataLayout &
DL = SI->getModule()->getDataLayout();
168 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
184template <
typename Inst>
187 Align Alignment =
I->getAlign();
188 return Alignment >=
Size &&
193 const auto *Subtarget =
TM->getSubtargetImpl(
F);
194 if (!Subtarget->enableAtomicExpand())
196 TLI = Subtarget->getTargetLowering();
197 DL = &
F.getParent()->getDataLayout();
204 if (
I.isAtomic() && !isa<FenceInst>(&
I))
207 bool MadeChange =
false;
208 for (
auto *
I : AtomicInsts) {
209 auto LI = dyn_cast<LoadInst>(
I);
210 auto SI = dyn_cast<StoreInst>(
I);
211 auto RMWI = dyn_cast<AtomicRMWInst>(
I);
212 auto CASI = dyn_cast<AtomicCmpXchgInst>(
I);
213 assert((LI || SI || RMWI || CASI) &&
"Unknown atomic instruction");
218 expandAtomicLoadToLibcall(LI);
224 expandAtomicStoreToLibcall(SI);
230 expandAtomicRMWToLibcall(RMWI);
236 expandAtomicCASToLibcall(CASI);
242 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I = LI = convertAtomicLoadToIntegerType(LI);
247 TLI->shouldCastAtomicStoreInIR(SI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I =
SI = convertAtomicStoreToIntegerType(SI);
252 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
253 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
254 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
259 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
262 I = CASI = convertCmpXchgToIntegerType(CASI);
267 if (TLI->shouldInsertFencesForAtomic(
I)) {
268 auto FenceOrdering = AtomicOrdering::Monotonic;
270 FenceOrdering = LI->getOrdering();
271 LI->setOrdering(AtomicOrdering::Monotonic);
273 FenceOrdering =
SI->getOrdering();
274 SI->setOrdering(AtomicOrdering::Monotonic);
277 FenceOrdering = RMWI->getOrdering();
278 RMWI->setOrdering(AtomicOrdering::Monotonic);
280 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
281 TargetLoweringBase::AtomicExpansionKind::None &&
289 FenceOrdering = CASI->getMergedOrdering();
290 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
291 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
294 if (FenceOrdering != AtomicOrdering::Monotonic) {
295 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
297 }
else if (
I->hasAtomicStore() &&
298 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
299 auto FenceOrdering = AtomicOrdering::Monotonic;
301 FenceOrdering =
SI->getOrdering();
303 FenceOrdering = RMWI->getOrdering();
304 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
305 TargetLoweringBase::AtomicExpansionKind::LLSC)
307 FenceOrdering = CASI->getSuccessOrdering();
310 if (
auto TrailingFence =
311 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
312 TrailingFence->moveAfter(
I);
318 MadeChange |= tryExpandAtomicLoad(LI);
320 MadeChange |= tryExpandAtomicStore(SI);
327 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
330 MadeChange |= tryExpandAtomicRMW(RMWI);
333 MadeChange |= tryExpandAtomicCmpXchg(CASI);
338bool AtomicExpandLegacy::runOnFunction(
Function &
F) {
340 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
345 return AE.run(
F,
TM);
349 return new AtomicExpandLegacy();
356 bool Changed = AE.run(
F,
TM);
363bool AtomicExpandImpl::bracketInstWithFences(
Instruction *
I,
365 ReplacementIRBuilder Builder(
I, *
DL);
367 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
369 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
373 TrailingFence->moveAfter(
I);
375 return (LeadingFence || TrailingFence);
381 EVT VT = TLI->getMemValueType(
DL,
T);
392 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
394 ReplacementIRBuilder Builder(LI, *
DL);
398 auto *NewLI = Builder.CreateLoad(NewTy,
Addr);
399 NewLI->setAlignment(LI->
getAlign());
402 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
404 Value *NewVal = Builder.CreateBitCast(NewLI, LI->
getType());
411AtomicExpandImpl::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
414 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
416 ReplacementIRBuilder Builder(RMWI, *
DL);
421 ? Builder.CreatePtrToInt(Val, NewTy)
422 : Builder.CreateBitCast(Val, NewTy);
424 auto *NewRMWI = Builder.CreateAtomicRMW(
427 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
430 ? Builder.CreateIntToPtr(NewRMWI, RMWI->
getType())
431 : Builder.CreateBitCast(NewRMWI, RMWI->
getType());
437bool AtomicExpandImpl::tryExpandAtomicLoad(
LoadInst *LI) {
438 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
442 expandAtomicOpToLLSC(
448 return expandAtomicLoadToLL(LI);
450 return expandAtomicLoadToCmpXchg(LI);
459bool AtomicExpandImpl::tryExpandAtomicStore(
StoreInst *SI) {
460 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
464 expandAtomicStore(SI);
474bool AtomicExpandImpl::expandAtomicLoadToLL(
LoadInst *LI) {
475 ReplacementIRBuilder Builder(LI, *
DL);
480 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
482 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
490bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
491 ReplacementIRBuilder Builder(LI, *
DL);
500 Value *Pair = Builder.CreateAtomicCmpXchg(
503 Value *Loaded = Builder.CreateExtractValue(Pair, 0,
"loaded");
520 ReplacementIRBuilder Builder(SI, *
DL);
521 auto *
M =
SI->getModule();
522 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
524 Value *NewVal = Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
532 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
533 SI->eraseFromParent();
537void AtomicExpandImpl::expandAtomicStore(
StoreInst *SI) {
544 ReplacementIRBuilder Builder(SI, *
DL);
552 SI->getAlign(), RMWOrdering);
553 SI->eraseFromParent();
556 tryExpandAtomicRMW(AI);
575 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
584bool AtomicExpandImpl::tryExpandAtomicRMW(
AtomicRMWInst *AI) {
591 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
593 if (ValueSize < MinCASSize) {
594 expandPartwordAtomicRMW(AI,
607 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
609 if (ValueSize < MinCASSize) {
610 expandPartwordAtomicRMW(AI,
621 <<
"A compare and swap loop was generated for an atomic "
623 << MemScope <<
" memory scope";
630 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
632 if (ValueSize < MinCASSize) {
637 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
641 expandAtomicRMWToMaskedIntrinsic(AI);
645 TLI->emitBitTestAtomicRMWIntrinsic(AI);
649 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
655 TLI->emitExpandAtomicRMW(AI);
664struct PartwordMaskValues {
666 Type *WordType =
nullptr;
668 Type *IntValueType =
nullptr;
669 Value *AlignedAddr =
nullptr;
670 Align AlignedAddrAlignment;
672 Value *ShiftAmt =
nullptr;
674 Value *Inv_Mask =
nullptr;
679 auto PrintObj = [&
O](
auto *
V) {
686 O <<
"PartwordMaskValues {\n";
688 PrintObj(PMV.WordType);
690 PrintObj(PMV.ValueType);
691 O <<
" AlignedAddr: ";
692 PrintObj(PMV.AlignedAddr);
693 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
695 PrintObj(PMV.ShiftAmt);
699 PrintObj(PMV.Inv_Mask);
725 unsigned MinWordSize) {
726 PartwordMaskValues PMV;
731 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
733 PMV.ValueType = PMV.IntValueType =
ValueType;
734 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
738 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
740 if (PMV.ValueType == PMV.WordType) {
741 PMV.AlignedAddr =
Addr;
742 PMV.AlignedAddrAlignment = AddrAlign;
743 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
744 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0,
true);
748 PMV.AlignedAddrAlignment =
Align(MinWordSize);
750 assert(ValueSize < MinWordSize);
753 IntegerType *IntTy =
DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
756 if (AddrAlign < MinWordSize) {
758 Intrinsic::ptrmask, {PtrTy, IntTy},
759 {
Addr, ConstantInt::get(IntTy, ~(
uint64_t)(MinWordSize - 1))},
nullptr,
763 PtrLSB = Builder.
CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
766 PMV.AlignedAddr =
Addr;
770 if (
DL.isLittleEndian()) {
772 PMV.ShiftAmt = Builder.
CreateShl(PtrLSB, 3);
776 Builder.
CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
779 PMV.ShiftAmt = Builder.
CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
781 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
784 PMV.Inv_Mask = Builder.
CreateNot(PMV.Mask,
"Inv_Mask");
790 const PartwordMaskValues &PMV) {
791 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
792 if (PMV.WordType == PMV.ValueType)
801 Value *Updated,
const PartwordMaskValues &PMV) {
802 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
803 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
804 if (PMV.WordType == PMV.ValueType)
811 Builder.
CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
823 const PartwordMaskValues &PMV) {
830 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, Shifted_Inc);
844 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Masked);
877void AtomicExpandImpl::expandPartwordAtomicRMW(
883 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
889 ReplacementIRBuilder Builder(AI, *
DL);
891 PartwordMaskValues PMV =
893 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
895 Value *ValOperand_Shifted =
nullptr;
899 Builder.CreateShl(Builder.CreateZExt(AI->
getValOperand(), PMV.WordType),
900 PMV.ShiftAmt,
"ValOperand_Shifted");
910 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
911 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
915 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
916 PMV.AlignedAddrAlignment, MemOpOrder,
927 ReplacementIRBuilder Builder(AI, *
DL);
932 "Unable to widen operation");
934 PartwordMaskValues PMV =
936 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
938 Value *ValOperand_Shifted =
940 PMV.ShiftAmt,
"ValOperand_Shifted");
946 Builder.
CreateOr(PMV.Inv_Mask, ValOperand_Shifted,
"AndOperand");
948 NewOperand = ValOperand_Shifted;
951 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1003 ReplacementIRBuilder Builder(CI, *
DL);
1014 std::prev(BB->
end())->eraseFromParent();
1017 PartwordMaskValues PMV =
1019 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1022 Value *NewVal_Shifted =
1024 Value *Cmp_Shifted =
1031 Value *InitLoaded_MaskOut = Builder.
CreateAnd(InitLoaded, PMV.Inv_Mask);
1037 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1040 Value *FullWord_NewVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Shifted);
1041 Value *FullWord_Cmp = Builder.
CreateOr(Loaded_MaskOut, Cmp_Shifted);
1043 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1071 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1086void AtomicExpandImpl::expandAtomicOpToLLSC(
1090 ReplacementIRBuilder Builder(
I, *
DL);
1091 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1092 MemOpOrder, PerformOp);
1094 I->replaceAllUsesWith(Loaded);
1095 I->eraseFromParent();
1098void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1099 ReplacementIRBuilder Builder(AI, *
DL);
1101 PartwordMaskValues PMV =
1103 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1111 CastOp = Instruction::SExt;
1115 PMV.ShiftAmt,
"ValOperand_Shifted");
1116 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1117 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1124void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1126 ReplacementIRBuilder Builder(CI, *
DL);
1130 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1138 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1139 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1145 CmpVal_Shifted, Builder.
CreateAnd(OldVal, PMV.Mask),
"Success");
1152Value *AtomicExpandImpl::insertRMWLLSCLoop(
1161 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1162 "Expected at least natural alignment at this point.");
1182 std::prev(BB->
end())->eraseFromParent();
1188 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1190 Value *NewVal = PerformOp(Builder, Loaded);
1192 Value *StoreSuccess =
1193 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1211 M->getDataLayout());
1213 ReplacementIRBuilder Builder(CI, *
DL);
1225 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1252 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1266 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1273 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1327 auto ReleasedLoadBB =
1331 auto ReleasingStoreBB =
1335 ReplacementIRBuilder Builder(CI, *
DL);
1340 std::prev(BB->
end())->eraseFromParent();
1342 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1343 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1345 PartwordMaskValues PMV =
1347 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1352 Value *UnreleasedLoad =
1353 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1354 Value *UnreleasedLoadExtract =
1361 Builder.
CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1364 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1365 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1370 Builder.
CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1371 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1372 Value *NewValueInsert =
1374 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1375 PMV.AlignedAddr, MemOpOrder);
1378 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1380 CI->
isWeak() ? FailureBB : RetryBB);
1384 if (HasReleasedLoadBB) {
1386 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1393 Builder.
CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1395 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1402 if (ShouldInsertFencesForAtomic ||
1403 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1404 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1410 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1411 if (HasReleasedLoadBB)
1412 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1417 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1423 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1425 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1426 if (ShouldInsertFencesForAtomic)
1427 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1437 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1438 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1445 Value *LoadedFull = LoadedExit;
1459 "weird extraction from { iN, i1 }");
1470 for (
auto *EV : PrunedInsts)
1487bool AtomicExpandImpl::isIdempotentRMW(
AtomicRMWInst *RMWI) {
1500 return C->isMinusOne();
1507bool AtomicExpandImpl::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1508 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1509 tryExpandAtomicLoad(ResultingLoad);
1515Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1546 std::prev(BB->
end())->eraseFromParent();
1556 Value *NewVal = PerformOp(Builder, Loaded);
1558 Value *NewLoaded =
nullptr;
1561 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1577 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1580 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1584 if (ValueSize < MinCASSize)
1585 return expandPartwordCmpXchg(CI);
1588 return expandAtomicCmpXchg(CI);
1591 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1602 Builder.setIsFPConstrained(
1607 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1611 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1612 AI->getValOperand());
1635 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1636 return Alignment >=
Size &&
1638 Size <= LargestSize;
1641void AtomicExpandImpl::expandAtomicLoadToLibcall(
LoadInst *
I) {
1643 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1644 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1647 bool expanded = expandAtomicOpToLibcall(
1648 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1654void AtomicExpandImpl::expandAtomicStoreToLibcall(
StoreInst *
I) {
1656 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1657 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1660 bool expanded = expandAtomicOpToLibcall(
1661 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1669 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1670 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1671 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1674 bool expanded = expandAtomicOpToLibcall(
1675 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1676 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1684 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1685 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1686 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1688 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1689 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1690 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1692 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1693 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1694 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1696 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1697 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1698 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1700 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1701 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1702 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1704 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1705 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1706 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1708 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1709 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1710 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1745void AtomicExpandImpl::expandAtomicRMWToLibcall(
AtomicRMWInst *
I) {
1751 if (!Libcalls.
empty())
1752 Success = expandAtomicOpToLibcall(
1753 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1767 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1773 expandAtomicCASToLibcall(Pair);
1784bool AtomicExpandImpl::expandAtomicOpToLibcall(
1794 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1799 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1816 if (UseSizedLibcall) {
1819 RTLibType = Libcalls[1];
1822 RTLibType = Libcalls[2];
1825 RTLibType = Libcalls[3];
1828 RTLibType = Libcalls[4];
1831 RTLibType = Libcalls[5];
1834 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1835 RTLibType = Libcalls[0];
1842 if (!TLI->getLibcallName(RTLibType)) {
1882 if (!UseSizedLibcall) {
1884 Args.push_back(ConstantInt::get(
DL.getIntPtrType(Ctx),
Size));
1892 Value *PtrVal = PointerOperand;
1894 Args.push_back(PtrVal);
1898 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1902 Args.push_back(AllocaCASExpected);
1907 if (UseSizedLibcall) {
1910 Args.push_back(IntValue);
1912 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1916 Args.push_back(AllocaValue);
1921 if (!CASExpected && HasResult && !UseSizedLibcall) {
1922 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1925 Args.push_back(AllocaResult);
1929 Args.push_back(OrderingVal);
1933 Args.push_back(Ordering2Val);
1939 }
else if (HasResult && UseSizedLibcall)
1940 ResultTy = SizedIntTy;
1946 for (
Value *Arg : Args)
1950 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1952 Call->setAttributes(Attr);
1956 if (ValueOperand && !UseSizedLibcall)
1962 Type *FinalResultTy =
I->getType();
1965 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
1969 I->replaceAllUsesWith(V);
1970 }
else if (HasResult) {
1972 if (UseSizedLibcall)
1979 I->replaceAllUsesWith(V);
1981 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Common base class shared among various IRBuilders.
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
BasicBlock::iterator GetInsertPoint() const
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.