69 #include "llvm/IR/IntrinsicsPowerPC.h"
102 using namespace llvm;
104 #define DEBUG_TYPE "ppc-lowering"
110 cl::desc(
"disable setting the node scheduling preference to ILP on PPC"),
cl::Hidden);
125 "ppc-quadword-atomics",
131 cl::desc(
"disable vector permute decomposition"),
134 STATISTIC(NumTailCalls,
"Number of tail calls");
135 STATISTIC(NumSiblingCalls,
"Number of sibling calls");
136 STATISTIC(ShufflesHandledWithVPERM,
"Number of shuffles lowered to a VPERM");
137 STATISTIC(NumDynamicAllocaProbed,
"Number of dynamic stack allocation probed");
154 initializeAddrModeMap();
157 bool isPPC64 = Subtarget.
isPPC64();
221 if (!Subtarget.
hasSPE()) {
230 for (
MVT VT : ScalarIntVTs) {
240 if (isPPC64 || Subtarget.
hasFPCVT()) {
349 if (!Subtarget.
hasSPE()) {
391 TM.Options.PPCGenScalarMASSEntries) {
539 if (
TM.Options.UnsafeFPMath) {
765 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
994 if (
TM.Options.UnsafeFPMath) {
1231 }
else if (Subtarget.
hasVSX()) {
1304 if (Subtarget.
hasMMA()) {
1502 void PPCTargetLowering::initializeAddrModeMap() {
1553 if (MaxAlign == MaxMaxAlign)
1555 if (
VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1556 if (MaxMaxAlign >= 32 &&
1557 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1558 MaxAlign =
Align(32);
1559 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1561 MaxAlign =
Align(16);
1562 }
else if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1565 if (EltAlign > MaxAlign)
1566 MaxAlign = EltAlign;
1567 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
1568 for (
auto *EltTy : STy->elements()) {
1571 if (EltAlign > MaxAlign)
1572 MaxAlign = EltAlign;
1573 if (MaxAlign == MaxMaxAlign)
1588 return Alignment.value();
1596 return Subtarget.
hasSPE();
1618 return "PPCISD::FP_TO_UINT_IN_VSR,";
1620 return "PPCISD::FP_TO_SINT_IN_VSR";
1624 return "PPCISD::FTSQRT";
1626 return "PPCISD::FSQRT";
1631 return "PPCISD::XXSPLTI_SP_TO_DP";
1633 return "PPCISD::XXSPLTI32DX";
1655 return "PPCISD::CALL_RM";
1657 return "PPCISD::CALL_NOP_RM";
1659 return "PPCISD::CALL_NOTOC_RM";
1664 return "PPCISD::BCTRL_RM";
1666 return "PPCISD::BCTRL_LOAD_TOC_RM";
1678 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1680 return "PPCISD::ANDI_rec_1_EQ_BIT";
1682 return "PPCISD::ANDI_rec_1_GT_BIT";
1697 return "PPCISD::ST_VSR_SCAL_INT";
1723 return "PPCISD::PADDI_DTPREL";
1740 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1742 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1752 return "PPCISD::STRICT_FADDRTZ";
1754 return "PPCISD::STRICT_FCTIDZ";
1756 return "PPCISD::STRICT_FCTIWZ";
1758 return "PPCISD::STRICT_FCTIDUZ";
1760 return "PPCISD::STRICT_FCTIWUZ";
1762 return "PPCISD::STRICT_FCFID";
1764 return "PPCISD::STRICT_FCFIDU";
1766 return "PPCISD::STRICT_FCFIDS";
1768 return "PPCISD::STRICT_FCFIDUS";
1794 return CFP->getValueAPF().isZero();
1798 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
CP->getConstVal()))
1799 return CFP->getValueAPF().isZero();
1807 return Op < 0 ||
Op == Val;
1819 if (ShuffleKind == 0) {
1822 for (
unsigned i = 0;
i != 16; ++
i)
1825 }
else if (ShuffleKind == 2) {
1828 for (
unsigned i = 0;
i != 16; ++
i)
1831 }
else if (ShuffleKind == 1) {
1832 unsigned j = IsLE ? 0 : 1;
1833 for (
unsigned i = 0;
i != 8; ++
i)
1850 if (ShuffleKind == 0) {
1853 for (
unsigned i = 0;
i != 16;
i += 2)
1857 }
else if (ShuffleKind == 2) {
1860 for (
unsigned i = 0;
i != 16;
i += 2)
1864 }
else if (ShuffleKind == 1) {
1865 unsigned j = IsLE ? 0 : 2;
1866 for (
unsigned i = 0;
i != 8;
i += 2)
1891 if (ShuffleKind == 0) {
1894 for (
unsigned i = 0;
i != 16;
i += 4)
1900 }
else if (ShuffleKind == 2) {
1903 for (
unsigned i = 0;
i != 16;
i += 4)
1909 }
else if (ShuffleKind == 1) {
1910 unsigned j = IsLE ? 0 : 4;
1911 for (
unsigned i = 0;
i != 8;
i += 4)
1928 unsigned LHSStart,
unsigned RHSStart) {
1931 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1932 "Unsupported merge size!");
1934 for (
unsigned i = 0;
i != 8/UnitSize; ++
i)
1935 for (
unsigned j = 0;
j != UnitSize; ++
j) {
1937 LHSStart+
j+
i*UnitSize) ||
1939 RHSStart+
j+
i*UnitSize))
1954 if (ShuffleKind == 1)
1956 else if (ShuffleKind == 2)
1961 if (ShuffleKind == 1)
1963 else if (ShuffleKind == 0)
1979 if (ShuffleKind == 1)
1981 else if (ShuffleKind == 2)
1986 if (ShuffleKind == 1)
1988 else if (ShuffleKind == 0)
2038 unsigned RHSStartValue) {
2042 for (
unsigned i = 0;
i < 2; ++
i)
2043 for (
unsigned j = 0;
j < 4; ++
j)
2045 i*RHSStartValue+
j+IndexOffset) ||
2047 i*RHSStartValue+
j+IndexOffset+8))
2069 unsigned indexOffset = CheckEven ? 4 : 0;
2070 if (ShuffleKind == 1)
2072 else if (ShuffleKind == 2)
2078 unsigned indexOffset = CheckEven ? 0 : 4;
2079 if (ShuffleKind == 1)
2081 else if (ShuffleKind == 0)
2107 if (
i == 16)
return -1;
2112 if (ShiftAmt <
i)
return -1;
2117 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2119 for (++
i;
i != 16; ++
i)
2122 }
else if (ShuffleKind == 1) {
2124 for (++
i;
i != 16; ++
i)
2131 ShiftAmt = 16 - ShiftAmt;
2140 EVT VT =
N->getValueType(0);
2142 return EltSize == 8 &&
N->getMaskElt(0) ==
N->getMaskElt(1);
2145 EltSize <= 8 &&
"Can only handle 1,2,4,8 byte element sizes");
2149 if (
N->getMaskElt(0) % EltSize != 0)
2154 unsigned ElementBase =
N->getMaskElt(0);
2157 if (ElementBase >= 16)
2162 for (
unsigned i = 1;
i != EltSize; ++
i)
2163 if (
N->getMaskElt(
i) < 0 ||
N->getMaskElt(
i) != (
int)(
i+ElementBase))
2166 for (
unsigned i = EltSize,
e = 16;
i !=
e;
i += EltSize) {
2167 if (
N->getMaskElt(
i) < 0)
continue;
2168 for (
unsigned j = 0;
j != EltSize; ++
j)
2169 if (
N->getMaskElt(
i+
j) !=
N->getMaskElt(
j))
2187 "Unexpected element width.");
2188 assert((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.");
2190 unsigned NumOfElem = 16 /
Width;
2191 unsigned MaskVal[16];
2192 for (
unsigned i = 0;
i < NumOfElem; ++
i) {
2193 MaskVal[0] =
N->getMaskElt(
i *
Width);
2194 if ((StepLen == 1) && (MaskVal[0] %
Width)) {
2196 }
else if ((StepLen == -1) && ((MaskVal[0] + 1) %
Width)) {
2200 for (
unsigned int j = 1;
j <
Width; ++
j) {
2201 MaskVal[
j] =
N->getMaskElt(
i *
Width +
j);
2202 if (MaskVal[
j] != MaskVal[
j-1] + StepLen) {
2212 unsigned &InsertAtByte,
bool &Swap,
bool IsLE) {
2217 unsigned M0 =
N->getMaskElt(0) / 4;
2218 unsigned M1 =
N->getMaskElt(4) / 4;
2219 unsigned M2 =
N->getMaskElt(8) / 4;
2220 unsigned M3 =
N->getMaskElt(12) / 4;
2221 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2222 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2227 if ((
M0 > 3 &&
M1 == 1 && M2 == 2 && M3 == 3) ||
2228 (
M0 < 4 &&
M1 == 5 && M2 == 6 && M3 == 7)) {
2229 ShiftElts = IsLE ? LittleEndianShifts[
M0 & 0x3] : BigEndianShifts[
M0 & 0x3];
2230 InsertAtByte = IsLE ? 12 : 0;
2235 if ((
M1 > 3 &&
M0 == 0 && M2 == 2 && M3 == 3) ||
2236 (
M1 < 4 &&
M0 == 4 && M2 == 6 && M3 == 7)) {
2237 ShiftElts = IsLE ? LittleEndianShifts[
M1 & 0x3] : BigEndianShifts[
M1 & 0x3];
2238 InsertAtByte = IsLE ? 8 : 4;
2243 if ((M2 > 3 &&
M0 == 0 &&
M1 == 1 && M3 == 3) ||
2244 (M2 < 4 &&
M0 == 4 &&
M1 == 5 && M3 == 7)) {
2245 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2246 InsertAtByte = IsLE ? 4 : 8;
2251 if ((M3 > 3 &&
M0 == 0 &&
M1 == 1 && M2 == 2) ||
2252 (M3 < 4 &&
M0 == 4 &&
M1 == 5 && M2 == 6)) {
2253 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2254 InsertAtByte = IsLE ? 0 : 12;
2261 if (
N->getOperand(1).isUndef()) {
2264 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2265 if (
M0 == XXINSERTWSrcElem &&
M1 == 1 && M2 == 2 && M3 == 3) {
2266 InsertAtByte = IsLE ? 12 : 0;
2269 if (
M0 == 0 &&
M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2270 InsertAtByte = IsLE ? 8 : 4;
2273 if (
M0 == 0 &&
M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2274 InsertAtByte = IsLE ? 4 : 8;
2277 if (
M0 == 0 &&
M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2278 InsertAtByte = IsLE ? 0 : 12;
2287 bool &Swap,
bool IsLE) {
2294 unsigned M0 =
N->getMaskElt(0) / 4;
2295 unsigned M1 =
N->getMaskElt(4) / 4;
2296 unsigned M2 =
N->getMaskElt(8) / 4;
2297 unsigned M3 =
N->getMaskElt(12) / 4;
2301 if (
N->getOperand(1).isUndef()) {
2302 assert(
M0 < 4 &&
"Indexing into an undef vector?");
2303 if (
M1 != (
M0 + 1) % 4 || M2 != (
M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2306 ShiftElts = IsLE ? (4 -
M0) % 4 :
M0;
2312 if (
M1 != (
M0 + 1) % 8 || M2 != (
M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2316 if (
M0 == 0 ||
M0 == 7 ||
M0 == 6 ||
M0 == 5) {
2321 ShiftElts = (8 -
M0) % 8;
2322 }
else if (
M0 == 4 ||
M0 == 3 ||
M0 == 2 ||
M0 == 1) {
2327 ShiftElts = (4 -
M0) % 4;
2332 if (
M0 == 0 ||
M0 == 1 ||
M0 == 2 ||
M0 == 3) {
2337 }
else if (
M0 == 4 ||
M0 == 5 ||
M0 == 6 ||
M0 == 7) {
2354 for (
int i = 0;
i < 16;
i +=
Width)
2355 if (
N->getMaskElt(
i) !=
i +
Width - 1)
2386 bool &Swap,
bool IsLE) {
2393 unsigned M0 =
N->getMaskElt(0) / 8;
2394 unsigned M1 =
N->getMaskElt(8) / 8;
2395 assert(((
M0 |
M1) < 4) &&
"A mask element out of bounds?");
2399 if (
N->getOperand(1).isUndef()) {
2400 if ((
M0 |
M1) < 2) {
2401 DM = IsLE ? (((~
M1) & 1) << 1) + ((~
M0) & 1) : (
M0 << 1) + (
M1 & 1);
2409 if (
M0 > 1 &&
M1 < 2) {
2411 }
else if (M0 < 2 && M1 > 1) {
2419 DM = (((~
M1) & 1) << 1) + ((~
M0) & 1);
2422 if (M0 < 2 && M1 > 1) {
2424 }
else if (
M0 > 1 &&
M1 < 2) {
2432 DM = (
M0 << 1) + (
M1 & 1);
2452 return (16 / EltSize) - 1 - (SVOp->
getMaskElt(0) / EltSize);
2468 unsigned EltSize = 16/
N->getNumOperands();
2469 if (EltSize < ByteSize) {
2470 unsigned Multiple = ByteSize/EltSize;
2472 assert(Multiple > 1 && Multiple <= 4 &&
"How can this happen?");
2475 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2476 if (
N->getOperand(
i).isUndef())
continue;
2478 if (!isa<ConstantSDNode>(
N->getOperand(
i)))
return SDValue();
2480 if (!UniquedVals[
i&(Multiple-1)].getNode())
2481 UniquedVals[
i&(Multiple-1)] =
N->getOperand(
i);
2482 else if (UniquedVals[
i&(Multiple-1)] !=
N->getOperand(
i))
2492 bool LeadingZero =
true;
2493 bool LeadingOnes =
true;
2494 for (
unsigned i = 0;
i != Multiple-1; ++
i) {
2495 if (!UniquedVals[
i].getNode())
continue;
2502 if (!UniquedVals[Multiple-1].getNode())
2504 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2509 if (!UniquedVals[Multiple-1].getNode())
2511 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2520 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2521 if (
N->getOperand(
i).isUndef())
continue;
2523 OpVal =
N->getOperand(
i);
2524 else if (OpVal !=
N->getOperand(
i))
2530 unsigned ValSizeInBytes = EltSize;
2533 Value = CN->getZExtValue();
2535 assert(CN->getValueType(0) ==
MVT::f32 &&
"Only one legal FP vector type!");
2542 if (ValSizeInBytes < ByteSize)
return SDValue();
2553 if (MaskVal == 0)
return SDValue();
2556 if (SignExtend32<5>(MaskVal) == MaskVal)
2570 if (!isa<ConstantSDNode>(
N))
2573 Imm = (int16_t)cast<ConstantSDNode>(
N)->getZExtValue();
2575 return Imm == (int32_t)cast<ConstantSDNode>(
N)->getZExtValue();
2577 return Imm == (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2595 return (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0);
2604 if (
MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2605 if (Memop->getMemoryVT() ==
MVT::f64) {
2606 Base =
N.getOperand(0);
2607 Index =
N.getOperand(1);
2619 if (!isa<ConstantSDNode>(
N))
2622 Imm = (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2623 return isInt<34>(
Imm);
2650 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm)))
2655 Base =
N.getOperand(0);
2656 Index =
N.getOperand(1);
2658 }
else if (
N.getOpcode() ==
ISD::OR) {
2660 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm)))
2672 if (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0) {
2673 Base =
N.getOperand(0);
2674 Index =
N.getOperand(1);
2744 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2750 Base =
N.getOperand(0);
2753 }
else if (
N.getOperand(1).getOpcode() ==
PPCISD::Lo) {
2755 assert(!cast<ConstantSDNode>(
N.getOperand(1).getOperand(1))->getZExtValue()
2756 &&
"Cannot handle constant offsets yet!");
2757 Disp =
N.getOperand(1).getOperand(0);
2762 Base =
N.getOperand(0);
2765 }
else if (
N.getOpcode() ==
ISD::OR) {
2768 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2778 dyn_cast<FrameIndexSDNode>(
N.getOperand(0))) {
2782 Base =
N.getOperand(0);
2795 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm))) {
2798 CN->getValueType(0));
2803 if ((CN->getValueType(0) ==
MVT::i32 ||
2804 (int64_t)CN->getZExtValue() == (
int)CN->getZExtValue()) &&
2805 (!EncodingAlignment ||
2806 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2807 int Addr = (
int)CN->getZExtValue();
2848 Base =
N.getOperand(0);
2864 Base =
N.getOperand(0);
2897 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
2898 Base =
N.getOperand(0);
2899 Index =
N.getOperand(1);
2911 Ty *PCRelCand = dyn_cast<Ty>(
N);
2923 if (isValidPCRelNode<ConstantPoolSDNode>(
N) ||
2924 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
2925 isValidPCRelNode<JumpTableSDNode>(
N) ||
2926 isValidPCRelNode<BlockAddressSDNode>(
N))
2942 EVT MemVT =
LD->getMemoryVT();
2949 if (!
ST.hasP8Vector())
2954 if (!
ST.hasP9Vector())
2967 if (UI.getUse().get().getResNo() == 0 &&
2989 Ptr =
LD->getBasePtr();
2990 VT =
LD->getMemoryVT();
2991 Alignment =
LD->getAlignment();
2993 Ptr =
ST->getBasePtr();
2994 VT =
ST->getMemoryVT();
2995 Alignment =
ST->getAlignment();
3018 if (isa<FrameIndexSDNode>(
Base) || isa<RegisterSDNode>(
Base))
3021 SDValue Val = cast<StoreSDNode>(
N)->getValue();
3051 isa<ConstantSDNode>(Offset))
3066 unsigned &HiOpFlags,
unsigned &LoOpFlags,
3108 const bool Is64Bit = Subtarget.
isPPC64();
3123 EVT PtrVT =
Op.getValueType();
3139 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3142 unsigned MOHiFlag, MOLoFlag;
3149 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3209 EVT PtrVT =
Op.getValueType();
3227 return getTOCEntry(DAG,
SDLoc(
JT), GA);
3230 unsigned MOHiFlag, MOLoFlag;
3237 return getTOCEntry(DAG,
SDLoc(GA), GA);
3247 EVT PtrVT =
Op.getValueType();
3266 return getTOCEntry(DAG,
SDLoc(BASDN), GA);
3275 unsigned MOHiFlag, MOLoFlag;
3286 return LowerGlobalTLSAddressAIX(
Op, DAG);
3288 return LowerGlobalTLSAddressLinux(
Op, DAG);
3312 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3313 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3331 bool is64bit = Subtarget.
isPPC64();
3379 if (!
TM.isPositionIndependent())
3438 PtrVT, GOTPtr, TGA, TGA);
3440 PtrVT, TLSAddr, TGA);
3449 EVT PtrVT =
Op.getValueType();