68#include "llvm/IR/IntrinsicsPowerPC.h"
104#define DEBUG_TYPE "ppc-lowering"
125 "ppc-quadword-atomics",
131 cl::desc(
"disable vector permute decomposition"),
135 "disable-auto-paired-vec-st",
136 cl::desc(
"disable automatically generated 32byte paired vector stores"),
142 "Number of shuffles lowered to a VPERM or XXPERM");
143STATISTIC(NumDynamicAllocaProbed,
"Number of dynamic stack allocation probed");
160 initializeAddrModeMap();
163 bool isPPC64 = Subtarget.
isPPC64();
172 if (!Subtarget.hasEFPU2())
197 if (Subtarget.isISA3_0()) {
227 if (!Subtarget.hasSPE()) {
236 for (
MVT VT : ScalarIntVTs) {
243 if (Subtarget.useCRBits()) {
246 if (isPPC64 || Subtarget.hasFPCVT()) {
320 if (Subtarget.isISA3_0()) {
355 if (!Subtarget.hasSPE()) {
360 if (Subtarget.hasVSX()) {
365 if (Subtarget.hasFSQRT()) {
370 if (Subtarget.hasFPRND()) {
411 if (Subtarget.hasSPE()) {
419 if (Subtarget.hasSPE())
425 if (!Subtarget.hasFSQRT() &&
426 !(
TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
430 if (!Subtarget.hasFSQRT() &&
431 !(
TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
432 Subtarget.hasFRES()))
435 if (Subtarget.hasFCPSGN()) {
443 if (Subtarget.hasFPRND()) {
457 if (Subtarget.isISA3_1()) {
468 if (Subtarget.isISA3_0()) {
488 if (!Subtarget.useCRBits()) {
501 if (!Subtarget.useCRBits())
504 if (Subtarget.hasFPU()) {
515 if (!Subtarget.useCRBits())
520 if (Subtarget.hasSPE()) {
544 if (Subtarget.hasDirectMove() && isPPC64) {
549 if (
TM.Options.UnsafeFPMath) {
652 if (Subtarget.hasSPE()) {
674 if (Subtarget.has64BitSupport()) {
689 if (Subtarget.hasLFIWAX() || Subtarget.
isPPC64()) {
695 if (Subtarget.hasSPE()) {
705 if (Subtarget.hasFPCVT()) {
706 if (Subtarget.has64BitSupport()) {
727 if (Subtarget.use64BitRegs()) {
745 if (Subtarget.has64BitSupport()) {
752 if (Subtarget.hasVSX()) {
759 if (Subtarget.hasAltivec()) {
775 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
788 if (Subtarget.hasVSX()) {
794 if (Subtarget.hasP8Altivec() && (VT.SimpleTy !=
MVT::v1i128)) {
804 if (Subtarget.hasP9Altivec() && (VT.SimpleTy !=
MVT::v1i128))
877 if (!Subtarget.hasP8Vector()) {
919 if (Subtarget.hasAltivec())
923 if (Subtarget.hasP8Altivec())
934 if (Subtarget.hasVSX()) {
940 if (Subtarget.hasP8Altivec())
945 if (Subtarget.isISA3_1()) {
983 if (Subtarget.hasVSX()) {
986 if (Subtarget.hasP8Vector()) {
990 if (Subtarget.hasDirectMove() && isPPC64) {
1004 if (
TM.Options.UnsafeFPMath) {
1041 if (Subtarget.hasP8Vector())
1050 if (Subtarget.hasP8Altivec()) {
1077 if (Subtarget.isISA3_1())
1179 if (Subtarget.hasP8Altivec()) {
1184 if (Subtarget.hasP9Vector()) {
1241 }
else if (Subtarget.hasVSX()) {
1285 if (Subtarget.hasP9Altivec()) {
1286 if (Subtarget.isISA3_1()) {
1304 if (Subtarget.hasP10Vector()) {
1309 if (Subtarget.pairedVectorMemops()) {
1314 if (Subtarget.hasMMA()) {
1315 if (Subtarget.isISAFuture())
1324 if (Subtarget.has64BitSupport())
1327 if (Subtarget.isISA3_1())
1345 if (Subtarget.hasAltivec()) {
1372 if (Subtarget.hasFPCVT())
1375 if (Subtarget.useCRBits())
1384 if (Subtarget.useCRBits()) {
1388 if (Subtarget.hasP9Altivec()) {
1418 if (Subtarget.useCRBits()) {
1515void PPCTargetLowering::initializeAddrModeMap() {
1566 if (MaxAlign == MaxMaxAlign)
1568 if (
VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1569 if (MaxMaxAlign >= 32 &&
1570 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1571 MaxAlign =
Align(32);
1572 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1574 MaxAlign =
Align(16);
1575 }
else if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1578 if (EltAlign > MaxAlign)
1579 MaxAlign = EltAlign;
1580 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
1581 for (
auto *EltTy : STy->elements()) {
1584 if (EltAlign > MaxAlign)
1585 MaxAlign = EltAlign;
1586 if (MaxAlign == MaxMaxAlign)
1599 if (Subtarget.hasAltivec())
1601 return Alignment.
value();
1609 return Subtarget.hasSPE();
1631 return "PPCISD::FP_TO_UINT_IN_VSR,";
1633 return "PPCISD::FP_TO_SINT_IN_VSR";
1637 return "PPCISD::FTSQRT";
1639 return "PPCISD::FSQRT";
1644 return "PPCISD::XXSPLTI_SP_TO_DP";
1646 return "PPCISD::XXSPLTI32DX";
1650 return "PPCISD::XXPERM";
1670 return "PPCISD::CALL_RM";
1672 return "PPCISD::CALL_NOP_RM";
1674 return "PPCISD::CALL_NOTOC_RM";
1679 return "PPCISD::BCTRL_RM";
1681 return "PPCISD::BCTRL_LOAD_TOC_RM";
1693 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1695 return "PPCISD::ANDI_rec_1_EQ_BIT";
1697 return "PPCISD::ANDI_rec_1_GT_BIT";
1712 return "PPCISD::ST_VSR_SCAL_INT";
1738 return "PPCISD::PADDI_DTPREL";
1755 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1757 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1767 return "PPCISD::STRICT_FADDRTZ";
1769 return "PPCISD::STRICT_FCTIDZ";
1771 return "PPCISD::STRICT_FCTIWZ";
1773 return "PPCISD::STRICT_FCTIDUZ";
1775 return "PPCISD::STRICT_FCTIWUZ";
1777 return "PPCISD::STRICT_FCFID";
1779 return "PPCISD::STRICT_FCFIDU";
1781 return "PPCISD::STRICT_FCFIDS";
1783 return "PPCISD::STRICT_FCFIDUS";
1786 return "PPCISD::STORE_COND";
1811 return CFP->getValueAPF().isZero();
1815 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1816 return CFP->getValueAPF().isZero();
1824 return Op < 0 || Op == Val;
1836 if (ShuffleKind == 0) {
1839 for (
unsigned i = 0; i != 16; ++i)
1842 }
else if (ShuffleKind == 2) {
1845 for (
unsigned i = 0; i != 16; ++i)
1848 }
else if (ShuffleKind == 1) {
1849 unsigned j = IsLE ? 0 : 1;
1850 for (
unsigned i = 0; i != 8; ++i)
1867 if (ShuffleKind == 0) {
1870 for (
unsigned i = 0; i != 16; i += 2)
1874 }
else if (ShuffleKind == 2) {
1877 for (
unsigned i = 0; i != 16; i += 2)
1881 }
else if (ShuffleKind == 1) {
1882 unsigned j = IsLE ? 0 : 2;
1883 for (
unsigned i = 0; i != 8; i += 2)
1904 if (!Subtarget.hasP8Vector())
1908 if (ShuffleKind == 0) {
1911 for (
unsigned i = 0; i != 16; i += 4)
1917 }
else if (ShuffleKind == 2) {
1920 for (
unsigned i = 0; i != 16; i += 4)
1926 }
else if (ShuffleKind == 1) {
1927 unsigned j = IsLE ? 0 : 4;
1928 for (
unsigned i = 0; i != 8; i += 4)
1945 unsigned LHSStart,
unsigned RHSStart) {
1948 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1949 "Unsupported merge size!");
1951 for (
unsigned i = 0; i != 8/UnitSize; ++i)
1952 for (
unsigned j = 0; j != UnitSize; ++j) {
1954 LHSStart+j+i*UnitSize) ||
1956 RHSStart+j+i*UnitSize))
1971 if (ShuffleKind == 1)
1973 else if (ShuffleKind == 2)
1978 if (ShuffleKind == 1)
1980 else if (ShuffleKind == 0)
1996 if (ShuffleKind == 1)
1998 else if (ShuffleKind == 2)
2003 if (ShuffleKind == 1)
2005 else if (ShuffleKind == 0)
2055 unsigned RHSStartValue) {
2059 for (
unsigned i = 0; i < 2; ++i)
2060 for (
unsigned j = 0; j < 4; ++j)
2062 i*RHSStartValue+j+IndexOffset) ||
2064 i*RHSStartValue+j+IndexOffset+8))
2086 unsigned indexOffset = CheckEven ? 4 : 0;
2087 if (ShuffleKind == 1)
2089 else if (ShuffleKind == 2)
2095 unsigned indexOffset = CheckEven ? 0 : 4;
2096 if (ShuffleKind == 1)
2098 else if (ShuffleKind == 0)
2121 for (i = 0; i != 16 && SVOp->
getMaskElt(i) < 0; ++i)
2124 if (i == 16)
return -1;
2129 if (ShiftAmt < i)
return -1;
2134 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2136 for (++i; i != 16; ++i)
2139 }
else if (ShuffleKind == 1) {
2141 for (++i; i != 16; ++i)
2148 ShiftAmt = 16 - ShiftAmt;
2157 EVT VT =
N->getValueType(0);
2159 return EltSize == 8 &&
N->getMaskElt(0) ==
N->getMaskElt(1);
2162 EltSize <= 8 &&
"Can only handle 1,2,4,8 byte element sizes");
2166 if (
N->getMaskElt(0) % EltSize != 0)
2171 unsigned ElementBase =
N->getMaskElt(0);
2174 if (ElementBase >= 16)
2179 for (
unsigned i = 1; i != EltSize; ++i)
2180 if (
N->getMaskElt(i) < 0 ||
N->getMaskElt(i) != (
int)(i+ElementBase))
2183 for (
unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2184 if (
N->getMaskElt(i) < 0)
continue;
2185 for (
unsigned j = 0; j != EltSize; ++j)
2186 if (
N->getMaskElt(i+j) !=
N->getMaskElt(j))
2203 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2204 "Unexpected element width.");
2205 assert((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.");
2207 unsigned NumOfElem = 16 / Width;
2208 unsigned MaskVal[16];
2209 for (
unsigned i = 0; i < NumOfElem; ++i) {
2210 MaskVal[0] =
N->getMaskElt(i * Width);
2211 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2213 }
else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2217 for (
unsigned int j = 1; j < Width; ++j) {
2218 MaskVal[j] =
N->getMaskElt(i * Width + j);
2219 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2229 unsigned &InsertAtByte,
bool &Swap,
bool IsLE) {
2234 unsigned M0 =
N->getMaskElt(0) / 4;
2235 unsigned M1 =
N->getMaskElt(4) / 4;
2236 unsigned M2 =
N->getMaskElt(8) / 4;
2237 unsigned M3 =
N->getMaskElt(12) / 4;
2238 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2239 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2244 if ((
M0 > 3 &&
M1 == 1 && M2 == 2 && M3 == 3) ||
2245 (
M0 < 4 &&
M1 == 5 && M2 == 6 && M3 == 7)) {
2246 ShiftElts = IsLE ? LittleEndianShifts[
M0 & 0x3] : BigEndianShifts[
M0 & 0x3];
2247 InsertAtByte = IsLE ? 12 : 0;
2252 if ((
M1 > 3 &&
M0 == 0 && M2 == 2 && M3 == 3) ||
2253 (
M1 < 4 &&
M0 == 4 && M2 == 6 && M3 == 7)) {
2254 ShiftElts = IsLE ? LittleEndianShifts[
M1 & 0x3] : BigEndianShifts[
M1 & 0x3];
2255 InsertAtByte = IsLE ? 8 : 4;
2260 if ((M2 > 3 &&
M0 == 0 &&
M1 == 1 && M3 == 3) ||
2261 (M2 < 4 &&
M0 == 4 &&
M1 == 5 && M3 == 7)) {
2262 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2263 InsertAtByte = IsLE ? 4 : 8;
2268 if ((M3 > 3 &&
M0 == 0 &&
M1 == 1 && M2 == 2) ||
2269 (M3 < 4 &&
M0 == 4 &&
M1 == 5 && M2 == 6)) {
2270 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2271 InsertAtByte = IsLE ? 0 : 12;
2278 if (
N->getOperand(1).isUndef()) {
2281 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2282 if (
M0 == XXINSERTWSrcElem &&
M1 == 1 && M2 == 2 && M3 == 3) {
2283 InsertAtByte = IsLE ? 12 : 0;
2286 if (
M0 == 0 &&
M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2287 InsertAtByte = IsLE ? 8 : 4;
2290 if (
M0 == 0 &&
M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2291 InsertAtByte = IsLE ? 4 : 8;
2294 if (
M0 == 0 &&
M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2295 InsertAtByte = IsLE ? 0 : 12;
2304 bool &Swap,
bool IsLE) {
2311 unsigned M0 =
N->getMaskElt(0) / 4;
2312 unsigned M1 =
N->getMaskElt(4) / 4;
2313 unsigned M2 =
N->getMaskElt(8) / 4;
2314 unsigned M3 =
N->getMaskElt(12) / 4;
2318 if (
N->getOperand(1).isUndef()) {
2319 assert(
M0 < 4 &&
"Indexing into an undef vector?");
2320 if (
M1 != (
M0 + 1) % 4 || M2 != (
M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2323 ShiftElts = IsLE ? (4 -
M0) % 4 :
M0;
2329 if (
M1 != (
M0 + 1) % 8 || M2 != (
M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2333 if (
M0 == 0 ||
M0 == 7 ||
M0 == 6 ||
M0 == 5) {
2338 ShiftElts = (8 -
M0) % 8;
2339 }
else if (
M0 == 4 ||
M0 == 3 ||
M0 == 2 ||
M0 == 1) {
2344 ShiftElts = (4 -
M0) % 4;
2349 if (
M0 == 0 ||
M0 == 1 ||
M0 == 2 ||
M0 == 3) {
2354 }
else if (
M0 == 4 ||
M0 == 5 ||
M0 == 6 ||
M0 == 7) {
2371 for (
int i = 0; i < 16; i += Width)
2372 if (
N->getMaskElt(i) != i + Width - 1)
2403 bool &Swap,
bool IsLE) {
2410 unsigned M0 =
N->getMaskElt(0) / 8;
2411 unsigned M1 =
N->getMaskElt(8) / 8;
2412 assert(((
M0 |
M1) < 4) &&
"A mask element out of bounds?");
2416 if (
N->getOperand(1).isUndef()) {
2417 if ((
M0 |
M1) < 2) {
2418 DM = IsLE ? (((~M1) & 1) << 1) + ((~
M0) & 1) : (
M0 << 1) + (
M1 & 1);
2426 if (
M0 > 1 &&
M1 < 2) {
2428 }
else if (M0 < 2 && M1 > 1) {
2436 DM = (((~M1) & 1) << 1) + ((~
M0) & 1);
2439 if (M0 < 2 && M1 > 1) {
2441 }
else if (
M0 > 1 &&
M1 < 2) {
2449 DM = (
M0 << 1) + (
M1 & 1);
2469 return (16 / EltSize) - 1 - (SVOp->
getMaskElt(0) / EltSize);
2485 unsigned EltSize = 16/
N->getNumOperands();
2486 if (EltSize < ByteSize) {
2487 unsigned Multiple = ByteSize/EltSize;
2489 assert(Multiple > 1 && Multiple <= 4 &&
"How can this happen?");
2492 for (
unsigned i = 0, e =
N->getNumOperands(); i != e; ++i) {
2493 if (
N->getOperand(i).isUndef())
continue;
2495 if (!isa<ConstantSDNode>(
N->getOperand(i)))
return SDValue();
2497 if (!UniquedVals[i&(Multiple-1)].getNode())
2498 UniquedVals[i&(Multiple-1)] =
N->getOperand(i);
2499 else if (UniquedVals[i&(Multiple-1)] !=
N->getOperand(i))
2509 bool LeadingZero =
true;
2510 bool LeadingOnes =
true;
2511 for (
unsigned i = 0; i != Multiple-1; ++i) {
2512 if (!UniquedVals[i].getNode())
continue;
2519 if (!UniquedVals[Multiple-1].getNode())
2521 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2526 if (!UniquedVals[Multiple-1].getNode())
2528 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2537 for (
unsigned i = 0, e =
N->getNumOperands(); i != e; ++i) {
2538 if (
N->getOperand(i).isUndef())
continue;
2540 OpVal =
N->getOperand(i);
2541 else if (OpVal !=
N->getOperand(i))
2547 unsigned ValSizeInBytes = EltSize;
2550 Value = CN->getZExtValue();
2552 assert(CN->getValueType(0) ==
MVT::f32 &&
"Only one legal FP vector type!");
2559 if (ValSizeInBytes < ByteSize)
return SDValue();
2570 if (MaskVal == 0)
return SDValue();
2573 if (SignExtend32<5>(MaskVal) == MaskVal)
2587 if (!isa<ConstantSDNode>(
N))
2590 Imm = (int16_t)cast<ConstantSDNode>(
N)->getZExtValue();
2592 return Imm == (int32_t)cast<ConstantSDNode>(
N)->getZExtValue();
2594 return Imm == (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2612 return (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0);
2621 if (
MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2622 if (Memop->getMemoryVT() ==
MVT::f64) {
2623 Base =
N.getOperand(0);
2636 if (!isa<ConstantSDNode>(
N))
2639 Imm = (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2640 return isInt<34>(Imm);
2667 (!EncodingAlignment ||
isAligned(*EncodingAlignment, Imm)))
2672 Base =
N.getOperand(0);
2675 }
else if (
N.getOpcode() ==
ISD::OR) {
2677 (!EncodingAlignment ||
isAligned(*EncodingAlignment, Imm)))
2689 if (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0) {
2690 Base =
N.getOperand(0);
2761 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2767 Base =
N.getOperand(0);
2770 }
else if (
N.getOperand(1).getOpcode() ==
PPCISD::Lo) {
2772 assert(!cast<ConstantSDNode>(
N.getOperand(1).getOperand(1))->getZExtValue()
2773 &&
"Cannot handle constant offsets yet!");
2774 Disp =
N.getOperand(1).getOperand(0);
2779 Base =
N.getOperand(0);
2782 }
else if (
N.getOpcode() ==
ISD::OR) {
2785 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2795 dyn_cast<FrameIndexSDNode>(
N.getOperand(0))) {
2799 Base =
N.getOperand(0);
2812 (!EncodingAlignment ||
isAligned(*EncodingAlignment, Imm))) {
2815 CN->getValueType(0));
2820 if ((CN->getValueType(0) ==
MVT::i32 ||
2821 (int64_t)CN->getZExtValue() == (
int)CN->getZExtValue()) &&
2822 (!EncodingAlignment ||
2823 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2824 int Addr = (int)CN->getZExtValue();
2865 Base =
N.getOperand(0);
2881 Base =
N.getOperand(0);
2914 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
2915 Base =
N.getOperand(0);
2928 Ty *PCRelCand = dyn_cast<Ty>(
N);
2940 if (isValidPCRelNode<ConstantPoolSDNode>(
N) ||
2941 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
2942 isValidPCRelNode<JumpTableSDNode>(
N) ||
2943 isValidPCRelNode<BlockAddressSDNode>(
N))
2959 EVT MemVT = LD->getMemoryVT();
2966 if (!ST.hasP8Vector())
2971 if (!ST.hasP9Vector())
2984 if (UI.getUse().get().getResNo() == 0 &&
3006 Ptr = LD->getBasePtr();
3007 VT = LD->getMemoryVT();
3008 Alignment = LD->getAlign();
3009 }
else if (
StoreSDNode *ST = dyn_cast<StoreSDNode>(
N)) {
3010 Ptr = ST->getBasePtr();
3011 VT = ST->getMemoryVT();
3012 Alignment = ST->getAlign();
3035 if (isa<FrameIndexSDNode>(
Base) || isa<RegisterSDNode>(
Base))
3038 SDValue Val = cast<StoreSDNode>(
N)->getValue();
3056 if (Alignment <
Align(4))
3068 isa<ConstantSDNode>(
Offset))
3083 unsigned &HiOpFlags,
unsigned &LoOpFlags,
3125 const bool Is64Bit = Subtarget.
isPPC64();
3140 EVT PtrVT =
Op.getValueType();
3156 return getTOCEntry(DAG,
SDLoc(CP), GA);
3159 unsigned MOHiFlag, MOLoFlag;
3166 return getTOCEntry(DAG,
SDLoc(CP), GA);
3226 EVT PtrVT = Op.getValueType();
3244 return getTOCEntry(DAG,
SDLoc(JT), GA);
3247 unsigned MOHiFlag, MOLoFlag;
3254 return getTOCEntry(DAG,
SDLoc(GA), GA);
3264 EVT PtrVT =
Op.getValueType();
3283 return getTOCEntry(DAG,
SDLoc(BASDN), GA);
3292 unsigned MOHiFlag, MOLoFlag;
3303 return LowerGlobalTLSAddressAIX(Op, DAG);
3305 return LowerGlobalTLSAddressLinux(Op, DAG);