LLVM 20.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
144 cl::desc("max depth when checking alias info in GatherAllAliases()"));
145
147 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
148 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
149 "function to use initial-exec"));
150
151STATISTIC(NumTailCalls, "Number of tail calls");
152STATISTIC(NumSiblingCalls, "Number of sibling calls");
153STATISTIC(ShufflesHandledWithVPERM,
154 "Number of shuffles lowered to a VPERM or XXPERM");
155STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
156
157static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
158
159static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
160
161static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
162
163// A faster local-[exec|dynamic] TLS access sequence (enabled with the
164// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
165// variables; consistent with the IBM XL compiler, we apply a max size of
166// slightly under 32KB.
168
169// FIXME: Remove this once the bug has been fixed!
171
173 const PPCSubtarget &STI)
174 : TargetLowering(TM), Subtarget(STI) {
175 // Initialize map that relates the PPC addressing modes to the computed flags
176 // of a load/store instruction. The map is used to determine the optimal
177 // addressing mode when selecting load and stores.
178 initializeAddrModeMap();
179 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
180 // arguments are at least 4/8 bytes aligned.
181 bool isPPC64 = Subtarget.isPPC64();
182 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
183 const MVT RegVT = Subtarget.getScalarIntVT();
184
185 // Set up the register classes.
186 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
187 if (!useSoftFloat()) {
188 if (hasSPE()) {
189 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
190 // EFPU2 APU only supports f32
191 if (!Subtarget.hasEFPU2())
192 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
193 } else {
194 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
195 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
196 }
197 }
198
200
201 // On P10, the default lowering generates better code using the
202 // setbc instruction.
203 if (!Subtarget.hasP10Vector()) {
205 if (isPPC64)
207 }
208
209 // Match BITREVERSE to customized fast code sequence in the td file.
212
213 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
215
216 // Custom lower inline assembly to check for special registers.
219
220 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
221 for (MVT VT : MVT::integer_valuetypes()) {
224 }
225
226 if (Subtarget.isISA3_0()) {
227 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
228 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
229 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
230 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
231 } else {
232 // No extending loads from f16 or HW conversions back and forth.
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
236 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
239 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
240 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
241 }
242
243 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
244
245 // PowerPC has pre-inc load and store's.
256 if (!Subtarget.hasSPE()) {
261 }
262
263 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
264 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
265 for (MVT VT : ScalarIntVTs) {
270 }
271
272 if (Subtarget.useCRBits()) {
274
275 if (isPPC64 || Subtarget.hasFPCVT()) {
280
282 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
284 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
285
290
292 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
294 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
295 } else {
300 }
301
302 // PowerPC does not support direct load/store of condition registers.
305
306 // FIXME: Remove this once the ANDI glue bug is fixed:
307 if (ANDIGlueBug)
309
310 for (MVT VT : MVT::integer_valuetypes()) {
313 setTruncStoreAction(VT, MVT::i1, Expand);
314 }
315
316 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317 }
318
319 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320 // PPC (the libcall is not available).
325
326 // We do not currently implement these libm ops for PowerPC.
327 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333
334 // PowerPC has no SREM/UREM instructions unless we are on P9
335 // On P9 we may use a hardware instruction to compute the remainder.
336 // When the result of both the remainder and the division is required it is
337 // more efficient to compute the remainder from the result of the division
338 // rather than use the remainder instruction. The instructions are legalized
339 // directly because the DivRemPairsPass performs the transformation at the IR
340 // level.
341 if (Subtarget.isISA3_0()) {
346 } else {
351 }
352
353 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362
363 // Handle constrained floating-point operations of scalar.
364 // TODO: Handle SPE specific operation.
370
375
376 if (!Subtarget.hasSPE()) {
379 }
380
381 if (Subtarget.hasVSX()) {
384 }
385
386 if (Subtarget.hasFSQRT()) {
389 }
390
391 if (Subtarget.hasFPRND()) {
396
401 }
402
403 // We don't support sin/cos/sqrt/fmod/pow
414
415 // MASS transformation for LLVM intrinsics with replicating fast-math flag
416 // to be consistent to PPCGenScalarMASSEntries pass
417 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
430 }
431
432 if (Subtarget.hasSPE()) {
435 } else {
436 setOperationAction(ISD::FMA , MVT::f64, Legal);
437 setOperationAction(ISD::FMA , MVT::f32, Legal);
440 }
441
442 if (Subtarget.hasSPE())
443 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
444
445 // If we're enabling GP optimizations, use hardware square root
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448 Subtarget.hasFRE()))
450
451 if (!Subtarget.hasFSQRT() &&
452 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453 Subtarget.hasFRES()))
455
456 if (Subtarget.hasFCPSGN()) {
459 } else {
462 }
463
464 if (Subtarget.hasFPRND()) {
469
474 }
475
476 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477 // instruction xxbrd to speed up scalar BSWAP64.
478 if (Subtarget.isISA3_1()) {
481 } else {
484 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
485 }
486
487 // CTPOP or CTTZ were introduced in P8/P9 respectively
488 if (Subtarget.isISA3_0()) {
489 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
490 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
491 } else {
492 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
493 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
494 }
495
496 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
499 } else {
502 }
503
504 // PowerPC does not have ROTR
507
508 if (!Subtarget.useCRBits()) {
509 // PowerPC does not have Select
514 }
515
516 // PowerPC wants to turn select_cc of FP into fsel when possible.
519
520 // PowerPC wants to optimize integer setcc a bit
521 if (!Subtarget.useCRBits())
523
524 if (Subtarget.hasFPU()) {
528
532 }
533
534 // PowerPC does not have BRCOND which requires SetCC
535 if (!Subtarget.useCRBits())
537
539
540 if (Subtarget.hasSPE()) {
541 // SPE has built-in conversions
548
549 // SPE supports signaling compare of f32/f64.
552 } else {
553 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
556
557 // PowerPC does not have [U|S]INT_TO_FP
562 }
563
564 if (Subtarget.hasDirectMove() && isPPC64) {
569 if (TM.Options.UnsafeFPMath) {
578 }
579 } else {
584 }
585
586 // We cannot sextinreg(i1). Expand to shifts.
588
589 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
590 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
591 // support continuation, user-level threading, and etc.. As a result, no
592 // other SjLj exception interfaces are implemented and please don't build
593 // your own exception handling based on them.
594 // LLVM/Clang supports zero-cost DWARF exception handling.
597
598 // We want to legalize GlobalAddress and ConstantPool nodes into the
599 // appropriate instructions to materialize the address.
610
611 // TRAP is legal.
612 setOperationAction(ISD::TRAP, MVT::Other, Legal);
613
614 // TRAMPOLINE is custom lowered.
617
618 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
620
621 if (Subtarget.is64BitELFABI()) {
622 // VAARG always uses double-word chunks, so promote anything smaller.
624 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
628 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
630 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
632 } else if (Subtarget.is32BitELFABI()) {
633 // VAARG is custom lowered with the 32-bit SVR4 ABI.
636 } else
638
639 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
640 if (Subtarget.is32BitELFABI())
642 else
644
645 // Use the default implementation.
646 setOperationAction(ISD::VAEND , MVT::Other, Expand);
655
656 // We want to custom lower some of our intrinsics.
662
663 // To handle counter-based loop conditions.
665
670
671 // Comparisons that require checking two conditions.
672 if (Subtarget.hasSPE()) {
677 }
690
693
694 if (Subtarget.has64BitSupport()) {
695 // They also have instructions for converting between i64 and fp.
704 // This is just the low 32 bits of a (signed) fp->i64 conversion.
705 // We cannot do this with Promote because i64 is not a legal type.
708
709 if (Subtarget.hasLFIWAX() || isPPC64) {
712 }
713 } else {
714 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
715 if (Subtarget.hasSPE()) {
718 } else {
721 }
722 }
723
724 // With the instructions enabled under FPCVT, we can do everything.
725 if (Subtarget.hasFPCVT()) {
726 if (Subtarget.has64BitSupport()) {
735 }
736
745 }
746
747 if (Subtarget.use64BitRegs()) {
748 // 64-bit PowerPC implementations can support i64 types directly
749 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
750 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
752 // 64-bit PowerPC wants to expand i128 shifts itself.
756 } else {
757 // 32-bit PowerPC wants to expand i64 shifts itself.
761 }
762
763 // PowerPC has better expansions for funnel shifts than the generic
764 // TargetLowering::expandFunnelShift.
765 if (Subtarget.has64BitSupport()) {
768 }
771
772 if (Subtarget.hasVSX()) {
777 }
778
779 if (Subtarget.hasAltivec()) {
780 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
785 }
786 // First set operation action for all vector types to expand. Then we
787 // will selectively turn on ones that can be effectively codegen'd.
789 // add/sub are legal for all supported vector VT's.
792
793 // For v2i64, these are only valid with P8Vector. This is corrected after
794 // the loop.
795 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
800 }
801 else {
806 }
807
808 if (Subtarget.hasVSX()) {
811 }
812
813 // Vector instructions introduced in P8
814 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
817 }
818 else {
821 }
822
823 // Vector instructions introduced in P9
824 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
826 else
828
829 // We promote all shuffles to v16i8.
831 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
832
833 // We promote all non-typed operations to v4i32.
835 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
837 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
839 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
841 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
843 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
846 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
848 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
849
850 // No other operations are legal.
889
890 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
891 setTruncStoreAction(VT, InnerVT, Expand);
894 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
895 }
896 }
898 if (!Subtarget.hasP8Vector()) {
899 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
900 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
901 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
903 }
904
905 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
906 // with merges, splats, etc.
908
909 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
910 // are cheap, so handle them before they get expanded to scalar.
916
917 setOperationAction(ISD::AND , MVT::v4i32, Legal);
918 setOperationAction(ISD::OR , MVT::v4i32, Legal);
919 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
920 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
922 Subtarget.useCRBits() ? Legal : Expand);
923 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
933 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
936
937 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
938 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
939 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
940 if (Subtarget.hasAltivec())
941 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
943 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
944 if (Subtarget.hasP8Altivec())
945 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
946
947 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
948 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
951
952 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
953 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
954
955 if (Subtarget.hasVSX()) {
956 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
957 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
959 }
960
961 if (Subtarget.hasP8Altivec())
962 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
963 else
964 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
965
966 if (Subtarget.isISA3_1()) {
967 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
968 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
971 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
972 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
973 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
975 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
977 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
979 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
981 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
983 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
985 }
986
987 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
988 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
989
992 // LE is P8+/64-bit so direct moves are supported and these operations
993 // are legal. The custom transformation requires 64-bit since we need a
994 // pair of stores that will cover a 128-bit load for P10.
995 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
999 }
1000
1005
1006 // Altivec does not contain unordered floating-point compare instructions
1007 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1008 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1011
1012 if (Subtarget.hasVSX()) {
1015 if (Subtarget.hasP8Vector()) {
1018 }
1019 if (Subtarget.hasDirectMove() && isPPC64) {
1028 }
1030
1031 // The nearbyint variants are not allowed to raise the inexact exception
1032 // so we can only code-gen them with unsafe math.
1033 if (TM.Options.UnsafeFPMath) {
1036 }
1037
1038 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1039 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1046
1048 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1052
1053 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1054 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1055
1056 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1057 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1058
1059 // Share the Altivec comparison restrictions.
1060 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1061 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1064
1065 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1066 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1067
1069
1070 if (Subtarget.hasP8Vector())
1071 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1072
1073 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1074
1075 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1076 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1078
1079 if (Subtarget.hasP8Altivec()) {
1080 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1081 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1083
1084 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1085 // SRL, but not for SRA because of the instructions available:
1086 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1087 // doing
1088 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1089 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1091
1092 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1093 }
1094 else {
1095 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1096 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1098
1099 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1100
1101 // VSX v2i64 only supports non-arithmetic operations.
1102 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1103 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1104 }
1105
1106 if (Subtarget.isISA3_1())
1107 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1108 else
1109 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1110
1111 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1112 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1114 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1115
1117
1126
1127 // Custom handling for partial vectors of integers converted to
1128 // floating point. We already have optimal handling for v2i32 through
1129 // the DAG combine, so those aren't necessary.
1146
1147 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1148 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1149 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1150 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1153
1156
1157 // Handle constrained floating-point operations of vector.
1158 // The predictor is `hasVSX` because altivec instruction has
1159 // no exception but VSX vector instruction has.
1173
1187
1188 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1189 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1190
1191 for (MVT FPT : MVT::fp_valuetypes())
1192 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1193
1194 // Expand the SELECT to SELECT_CC
1196
1197 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1198 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1199
1200 // No implementation for these ops for PowerPC.
1202 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1203 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1204 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1206 setOperationAction(ISD::FREM, MVT::f128, Expand);
1207 }
1208
1209 if (Subtarget.hasP8Altivec()) {
1210 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1211 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1212 }
1213
1214 if (Subtarget.hasP9Vector()) {
1217
1218 // Test data class instructions store results in CR bits.
1219 if (Subtarget.useCRBits()) {
1224 }
1225
1226 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1227 // SRL, but not for SRA because of the instructions available:
1228 // VS{RL} and VS{RL}O.
1229 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1230 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1232
1233 setOperationAction(ISD::FADD, MVT::f128, Legal);
1234 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1235 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1236 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238
1239 setOperationAction(ISD::FMA, MVT::f128, Legal);
1246
1248 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1253
1257
1258 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1276 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1279 } else if (Subtarget.hasVSX()) {
1282
1283 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1284 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1285
1286 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1287 // fp_to_uint and int_to_fp.
1290
1291 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1292 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1293 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1294 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FMA, MVT::f128, Expand);
1298
1299 // Expand the fp_extend if the target type is fp128.
1302
1303 // Expand the fp_round if the source type is fp128.
1304 for (MVT VT : {MVT::f32, MVT::f64}) {
1307 }
1308
1313
1314 // Lower following f128 select_cc pattern:
1315 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1317
1318 // We need to handle f128 SELECT_CC with integer result type.
1320 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1321 }
1322
1323 if (Subtarget.hasP9Altivec()) {
1324 if (Subtarget.isISA3_1()) {
1329 } else {
1332 }
1340
1341 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1342 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1344 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1345 }
1346
1347 if (Subtarget.hasP10Vector()) {
1349 }
1350 }
1351
1352 if (Subtarget.pairedVectorMemops()) {
1353 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1354 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1355 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356 }
1357 if (Subtarget.hasMMA()) {
1358 if (Subtarget.isISAFuture())
1359 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360 else
1361 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1362 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1365 }
1366
1367 if (Subtarget.has64BitSupport())
1369
1370 if (Subtarget.isISA3_1())
1371 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1372
1373 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1374
1375 if (!isPPC64) {
1378 }
1379
1384 }
1385
1387
1388 if (Subtarget.hasAltivec()) {
1389 // Altivec instructions set fields to all zeros or all ones.
1391 }
1392
1395 else if (isPPC64)
1397 else
1399
1400 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1401
1402 // We have target-specific dag combine patterns for the following nodes:
1405 if (Subtarget.hasFPCVT())
1408 if (Subtarget.useCRBits())
1412
1414
1416
1417 if (Subtarget.useCRBits()) {
1419 }
1420
1421 setLibcallName(RTLIB::LOG_F128, "logf128");
1422 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1423 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1424 setLibcallName(RTLIB::EXP_F128, "expf128");
1425 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1426 setLibcallName(RTLIB::SIN_F128, "sinf128");
1427 setLibcallName(RTLIB::COS_F128, "cosf128");
1428 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1429 setLibcallName(RTLIB::POW_F128, "powf128");
1430 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1431 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1432 setLibcallName(RTLIB::REM_F128, "fmodf128");
1433 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1434 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1435 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1436 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1437 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1438 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1439 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1440 setLibcallName(RTLIB::RINT_F128, "rintf128");
1441 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1442 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1443 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1444 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1445 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1446
1447 if (Subtarget.isAIXABI()) {
1448 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1449 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1450 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1451 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1452 }
1453
1454 // With 32 condition bits, we don't need to sink (and duplicate) compares
1455 // aggressively in CodeGenPrep.
1456 if (Subtarget.useCRBits()) {
1459 }
1460
1461 // TODO: The default entry number is set to 64. This stops most jump table
1462 // generation on PPC. But it is good for current PPC HWs because the indirect
1463 // branch instruction mtctr to the jump table may lead to bad branch predict.
1464 // Re-evaluate this value on future HWs that can do better with mtctr.
1466
1468
1469 switch (Subtarget.getCPUDirective()) {
1470 default: break;
1471 case PPC::DIR_970:
1472 case PPC::DIR_A2:
1473 case PPC::DIR_E500:
1474 case PPC::DIR_E500mc:
1475 case PPC::DIR_E5500:
1476 case PPC::DIR_PWR4:
1477 case PPC::DIR_PWR5:
1478 case PPC::DIR_PWR5X:
1479 case PPC::DIR_PWR6:
1480 case PPC::DIR_PWR6X:
1481 case PPC::DIR_PWR7:
1482 case PPC::DIR_PWR8:
1483 case PPC::DIR_PWR9:
1484 case PPC::DIR_PWR10:
1485 case PPC::DIR_PWR11:
1489 break;
1490 }
1491
1492 if (Subtarget.enableMachineScheduler())
1494 else
1496
1498
1499 // The Freescale cores do better with aggressive inlining of memcpy and
1500 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1501 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1502 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1503 MaxStoresPerMemset = 32;
1505 MaxStoresPerMemcpy = 32;
1509 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1510 // The A2 also benefits from (very) aggressive inlining of memcpy and
1511 // friends. The overhead of a the function call, even when warm, can be
1512 // over one hundred cycles.
1513 MaxStoresPerMemset = 128;
1514 MaxStoresPerMemcpy = 128;
1515 MaxStoresPerMemmove = 128;
1516 MaxLoadsPerMemcmp = 128;
1517 } else {
1520 }
1521
1522 IsStrictFPEnabled = true;
1523
1524 // Let the subtarget (CPU) decide if a predictable select is more expensive
1525 // than the corresponding branch. This information is used in CGP to decide
1526 // when to convert selects into branches.
1528
1530}
1531
1532// *********************************** NOTE ************************************
1533// For selecting load and store instructions, the addressing modes are defined
1534// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1535// patterns to match the load the store instructions.
1536//
1537// The TD definitions for the addressing modes correspond to their respective
1538// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1539// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1540// address mode flags of a particular node. Afterwards, the computed address
1541// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1542// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1543// accordingly, based on the preferred addressing mode.
1544//
1545// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1546// MemOpFlags contains all the possible flags that can be used to compute the
1547// optimal addressing mode for load and store instructions.
1548// AddrMode contains all the possible load and store addressing modes available
1549// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1550//
1551// When adding new load and store instructions, it is possible that new address
1552// flags may need to be added into MemOpFlags, and a new addressing mode will
1553// need to be added to AddrMode. An entry of the new addressing mode (consisting
1554// of the minimal and main distinguishing address flags for the new load/store
1555// instructions) will need to be added into initializeAddrModeMap() below.
1556// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1557// need to be updated to account for selecting the optimal addressing mode.
1558// *****************************************************************************
1559/// Initialize the map that relates the different addressing modes of the load
1560/// and store instructions to a set of flags. This ensures the load/store
1561/// instruction is correctly matched during instruction selection.
1562void PPCTargetLowering::initializeAddrModeMap() {
1563 AddrModesMap[PPC::AM_DForm] = {
1564 // LWZ, STW
1569 // LBZ, LHZ, STB, STH
1574 // LHA
1579 // LFS, LFD, STFS, STFD
1584 };
1585 AddrModesMap[PPC::AM_DSForm] = {
1586 // LWA
1590 // LD, STD
1594 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1598 };
1599 AddrModesMap[PPC::AM_DQForm] = {
1600 // LXV, STXV
1604 };
1605 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1607 // TODO: Add mapping for quadword load/store.
1608}
1609
1610/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1611/// the desired ByVal argument alignment.
1612static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1613 if (MaxAlign == MaxMaxAlign)
1614 return;
1615 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1616 if (MaxMaxAlign >= 32 &&
1617 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1618 MaxAlign = Align(32);
1619 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1620 MaxAlign < 16)
1621 MaxAlign = Align(16);
1622 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1623 Align EltAlign;
1624 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1625 if (EltAlign > MaxAlign)
1626 MaxAlign = EltAlign;
1627 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1628 for (auto *EltTy : STy->elements()) {
1629 Align EltAlign;
1630 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1631 if (EltAlign > MaxAlign)
1632 MaxAlign = EltAlign;
1633 if (MaxAlign == MaxMaxAlign)
1634 break;
1635 }
1636 }
1637}
1638
1639/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1640/// function arguments in the caller parameter area.
1642 const DataLayout &DL) const {
1643 // 16byte and wider vectors are passed on 16byte boundary.
1644 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1645 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1646 if (Subtarget.hasAltivec())
1647 getMaxByValAlign(Ty, Alignment, Align(16));
1648 return Alignment;
1649}
1650
1652 return Subtarget.useSoftFloat();
1653}
1654
1656 return Subtarget.hasSPE();
1657}
1658
1660 return VT.isScalarInteger();
1661}
1662
1664 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1665 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1666 return false;
1667
1668 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1669 if (VTy->getScalarType()->isIntegerTy()) {
1670 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1671 if (ElemSizeInBits == 32) {
1672 Index = Subtarget.isLittleEndian() ? 2 : 1;
1673 return true;
1674 }
1675 if (ElemSizeInBits == 64) {
1676 Index = Subtarget.isLittleEndian() ? 1 : 0;
1677 return true;
1678 }
1679 }
1680 }
1681 return false;
1682}
1683
1684const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1685 switch ((PPCISD::NodeType)Opcode) {
1686 case PPCISD::FIRST_NUMBER: break;
1687 case PPCISD::FSEL: return "PPCISD::FSEL";
1688 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1689 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1690 case PPCISD::FCFID: return "PPCISD::FCFID";
1691 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1692 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1693 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1694 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1695 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1696 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1697 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1698 case PPCISD::FRE: return "PPCISD::FRE";
1699 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1700 case PPCISD::FTSQRT:
1701 return "PPCISD::FTSQRT";
1702 case PPCISD::FSQRT:
1703 return "PPCISD::FSQRT";
1704 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1705 case PPCISD::VPERM: return "PPCISD::VPERM";
1706 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1708 return "PPCISD::XXSPLTI_SP_TO_DP";
1710 return "PPCISD::XXSPLTI32DX";
1711 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1712 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1713 case PPCISD::XXPERM:
1714 return "PPCISD::XXPERM";
1715 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1716 case PPCISD::CMPB: return "PPCISD::CMPB";
1717 case PPCISD::Hi: return "PPCISD::Hi";
1718 case PPCISD::Lo: return "PPCISD::Lo";
1719 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1720 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1721 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1722 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1723 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1724 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1725 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1726 case PPCISD::SRL: return "PPCISD::SRL";
1727 case PPCISD::SRA: return "PPCISD::SRA";
1728 case PPCISD::SHL: return "PPCISD::SHL";
1729 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1730 case PPCISD::CALL: return "PPCISD::CALL";
1731 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1732 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1733 case PPCISD::CALL_RM:
1734 return "PPCISD::CALL_RM";
1736 return "PPCISD::CALL_NOP_RM";
1738 return "PPCISD::CALL_NOTOC_RM";
1739 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1740 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1741 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1742 case PPCISD::BCTRL_RM:
1743 return "PPCISD::BCTRL_RM";
1745 return "PPCISD::BCTRL_LOAD_TOC_RM";
1746 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1747 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1748 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1749 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1750 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1751 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1752 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1753 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1754 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1755 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1757 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1759 return "PPCISD::ANDI_rec_1_EQ_BIT";
1761 return "PPCISD::ANDI_rec_1_GT_BIT";
1762 case PPCISD::VCMP: return "PPCISD::VCMP";
1763 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1764 case PPCISD::LBRX: return "PPCISD::LBRX";
1765 case PPCISD::STBRX: return "PPCISD::STBRX";
1766 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1767 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1768 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1769 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1770 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1771 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1772 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1773 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1774 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1776 return "PPCISD::ST_VSR_SCAL_INT";
1777 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1778 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1779 case PPCISD::BDZ: return "PPCISD::BDZ";
1780 case PPCISD::MFFS: return "PPCISD::MFFS";
1781 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1782 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1783 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1784 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1785 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1786 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1787 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1788 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1789 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1790 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1791 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1792 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1793 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1794 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1795 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1796 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1797 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1798 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1799 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1800 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1801 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1802 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1803 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1805 return "PPCISD::PADDI_DTPREL";
1806 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1807 case PPCISD::SC: return "PPCISD::SC";
1808 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1809 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1810 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1811 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1812 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1813 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1814 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1815 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1816 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1817 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1818 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1819 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1821 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1823 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1824 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1825 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1826 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1827 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1828 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1829 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1830 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1831 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1833 return "PPCISD::STRICT_FADDRTZ";
1835 return "PPCISD::STRICT_FCTIDZ";
1837 return "PPCISD::STRICT_FCTIWZ";
1839 return "PPCISD::STRICT_FCTIDUZ";
1841 return "PPCISD::STRICT_FCTIWUZ";
1843 return "PPCISD::STRICT_FCFID";
1845 return "PPCISD::STRICT_FCFIDU";
1847 return "PPCISD::STRICT_FCFIDS";
1849 return "PPCISD::STRICT_FCFIDUS";
1850 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1851 case PPCISD::STORE_COND:
1852 return "PPCISD::STORE_COND";
1853 case PPCISD::SETBC:
1854 return "PPCISD::SETBC";
1855 case PPCISD::SETBCR:
1856 return "PPCISD::SETBCR";
1857 }
1858 return nullptr;
1859}
1860
1862 EVT VT) const {
1863 if (!VT.isVector())
1864 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1865
1867}
1868
1870 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1871 return true;
1872}
1873
1874//===----------------------------------------------------------------------===//
1875// Node matching predicates, for use by the tblgen matching code.
1876//===----------------------------------------------------------------------===//
1877
1878/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1880 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1881 return CFP->getValueAPF().isZero();
1882 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1883 // Maybe this has already been legalized into the constant pool?
1884 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1885 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1886 return CFP->getValueAPF().isZero();
1887 }
1888 return false;
1889}
1890
1891/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1892/// true if Op is undef or if it matches the specified value.
1893static bool isConstantOrUndef(int Op, int Val) {
1894 return Op < 0 || Op == Val;
1895}
1896
1897/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1898/// VPKUHUM instruction.
1899/// The ShuffleKind distinguishes between big-endian operations with
1900/// two different inputs (0), either-endian operations with two identical
1901/// inputs (1), and little-endian operations with two different inputs (2).
1902/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1904 SelectionDAG &DAG) {
1905 bool IsLE = DAG.getDataLayout().isLittleEndian();
1906 if (ShuffleKind == 0) {
1907 if (IsLE)
1908 return false;
1909 for (unsigned i = 0; i != 16; ++i)
1910 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1911 return false;
1912 } else if (ShuffleKind == 2) {
1913 if (!IsLE)
1914 return false;
1915 for (unsigned i = 0; i != 16; ++i)
1916 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1917 return false;
1918 } else if (ShuffleKind == 1) {
1919 unsigned j = IsLE ? 0 : 1;
1920 for (unsigned i = 0; i != 8; ++i)
1921 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1922 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1923 return false;
1924 }
1925 return true;
1926}
1927
1928/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1929/// VPKUWUM instruction.
1930/// The ShuffleKind distinguishes between big-endian operations with
1931/// two different inputs (0), either-endian operations with two identical
1932/// inputs (1), and little-endian operations with two different inputs (2).
1933/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1935 SelectionDAG &DAG) {
1936 bool IsLE = DAG.getDataLayout().isLittleEndian();
1937 if (ShuffleKind == 0) {
1938 if (IsLE)
1939 return false;
1940 for (unsigned i = 0; i != 16; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1943 return false;
1944 } else if (ShuffleKind == 2) {
1945 if (!IsLE)
1946 return false;
1947 for (unsigned i = 0; i != 16; i += 2)
1948 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1949 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1950 return false;
1951 } else if (ShuffleKind == 1) {
1952 unsigned j = IsLE ? 0 : 2;
1953 for (unsigned i = 0; i != 8; i += 2)
1954 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1955 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1956 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1957 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1958 return false;
1959 }
1960 return true;
1961}
1962
1963/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1964/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1965/// current subtarget.
1966///
1967/// The ShuffleKind distinguishes between big-endian operations with
1968/// two different inputs (0), either-endian operations with two identical
1969/// inputs (1), and little-endian operations with two different inputs (2).
1970/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1972 SelectionDAG &DAG) {
1973 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1974 if (!Subtarget.hasP8Vector())
1975 return false;
1976
1977 bool IsLE = DAG.getDataLayout().isLittleEndian();
1978 if (ShuffleKind == 0) {
1979 if (IsLE)
1980 return false;
1981 for (unsigned i = 0; i != 16; i += 4)
1982 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1983 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1984 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1985 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1986 return false;
1987 } else if (ShuffleKind == 2) {
1988 if (!IsLE)
1989 return false;
1990 for (unsigned i = 0; i != 16; i += 4)
1991 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1992 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1993 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1994 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1995 return false;
1996 } else if (ShuffleKind == 1) {
1997 unsigned j = IsLE ? 0 : 4;
1998 for (unsigned i = 0; i != 8; i += 4)
1999 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
2000 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
2001 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
2002 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
2003 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2004 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2005 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2006 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2007 return false;
2008 }
2009 return true;
2010}
2011
2012/// isVMerge - Common function, used to match vmrg* shuffles.
2013///
2014static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2015 unsigned LHSStart, unsigned RHSStart) {
2016 if (N->getValueType(0) != MVT::v16i8)
2017 return false;
2018 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2019 "Unsupported merge size!");
2020
2021 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2022 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2023 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2024 LHSStart+j+i*UnitSize) ||
2025 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2026 RHSStart+j+i*UnitSize))
2027 return false;
2028 }
2029 return true;
2030}
2031
2032/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2033/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2034/// The ShuffleKind distinguishes between big-endian merges with two
2035/// different inputs (0), either-endian merges with two identical inputs (1),
2036/// and little-endian merges with two different inputs (2). For the latter,
2037/// the input operands are swapped (see PPCInstrAltivec.td).
2039 unsigned ShuffleKind, SelectionDAG &DAG) {
2040 if (DAG.getDataLayout().isLittleEndian()) {
2041 if (ShuffleKind == 1) // unary
2042 return isVMerge(N, UnitSize, 0, 0);
2043 else if (ShuffleKind == 2) // swapped
2044 return isVMerge(N, UnitSize, 0, 16);
2045 else
2046 return false;
2047 } else {
2048 if (ShuffleKind == 1) // unary
2049 return isVMerge(N, UnitSize, 8, 8);
2050 else if (ShuffleKind == 0) // normal
2051 return isVMerge(N, UnitSize, 8, 24);
2052 else
2053 return false;
2054 }
2055}
2056
2057/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2058/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2059/// The ShuffleKind distinguishes between big-endian merges with two
2060/// different inputs (0), either-endian merges with two identical inputs (1),
2061/// and little-endian merges with two different inputs (2). For the latter,
2062/// the input operands are swapped (see PPCInstrAltivec.td).
2064 unsigned ShuffleKind, SelectionDAG &DAG) {
2065 if (DAG.getDataLayout().isLittleEndian()) {
2066 if (ShuffleKind == 1) // unary
2067 return isVMerge(N, UnitSize, 8, 8);
2068 else if (ShuffleKind == 2) // swapped
2069 return isVMerge(N, UnitSize, 8, 24);
2070 else
2071 return false;
2072 } else {
2073 if (ShuffleKind == 1) // unary
2074 return isVMerge(N, UnitSize, 0, 0);
2075 else if (ShuffleKind == 0) // normal
2076 return isVMerge(N, UnitSize, 0, 16);
2077 else
2078 return false;
2079 }
2080}
2081
2082/**
2083 * Common function used to match vmrgew and vmrgow shuffles
2084 *
2085 * The indexOffset determines whether to look for even or odd words in
2086 * the shuffle mask. This is based on the of the endianness of the target
2087 * machine.
2088 * - Little Endian:
2089 * - Use offset of 0 to check for odd elements
2090 * - Use offset of 4 to check for even elements
2091 * - Big Endian:
2092 * - Use offset of 0 to check for even elements
2093 * - Use offset of 4 to check for odd elements
2094 * A detailed description of the vector element ordering for little endian and
2095 * big endian can be found at
2096 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2097 * Targeting your applications - what little endian and big endian IBM XL C/C++
2098 * compiler differences mean to you
2099 *
2100 * The mask to the shuffle vector instruction specifies the indices of the
2101 * elements from the two input vectors to place in the result. The elements are
2102 * numbered in array-access order, starting with the first vector. These vectors
2103 * are always of type v16i8, thus each vector will contain 16 elements of size
2104 * 8. More info on the shuffle vector can be found in the
2105 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2106 * Language Reference.
2107 *
2108 * The RHSStartValue indicates whether the same input vectors are used (unary)
2109 * or two different input vectors are used, based on the following:
2110 * - If the instruction uses the same vector for both inputs, the range of the
2111 * indices will be 0 to 15. In this case, the RHSStart value passed should
2112 * be 0.
2113 * - If the instruction has two different vectors then the range of the
2114 * indices will be 0 to 31. In this case, the RHSStart value passed should
2115 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2116 * to 31 specify elements in the second vector).
2117 *
2118 * \param[in] N The shuffle vector SD Node to analyze
2119 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2120 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2121 * vector to the shuffle_vector instruction
2122 * \return true iff this shuffle vector represents an even or odd word merge
2123 */
2124static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2125 unsigned RHSStartValue) {
2126 if (N->getValueType(0) != MVT::v16i8)
2127 return false;
2128
2129 for (unsigned i = 0; i < 2; ++i)
2130 for (unsigned j = 0; j < 4; ++j)
2131 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2132 i*RHSStartValue+j+IndexOffset) ||
2133 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2134 i*RHSStartValue+j+IndexOffset+8))
2135 return false;
2136 return true;
2137}
2138
2139/**
2140 * Determine if the specified shuffle mask is suitable for the vmrgew or
2141 * vmrgow instructions.
2142 *
2143 * \param[in] N The shuffle vector SD Node to analyze
2144 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2145 * \param[in] ShuffleKind Identify the type of merge:
2146 * - 0 = big-endian merge with two different inputs;
2147 * - 1 = either-endian merge with two identical inputs;
2148 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2149 * little-endian merges).
2150 * \param[in] DAG The current SelectionDAG
2151 * \return true iff this shuffle mask
2152 */
2154 unsigned ShuffleKind, SelectionDAG &DAG) {
2155 if (DAG.getDataLayout().isLittleEndian()) {
2156 unsigned indexOffset = CheckEven ? 4 : 0;
2157 if (ShuffleKind == 1) // Unary
2158 return isVMerge(N, indexOffset, 0);
2159 else if (ShuffleKind == 2) // swapped
2160 return isVMerge(N, indexOffset, 16);
2161 else
2162 return false;
2163 }
2164 else {
2165 unsigned indexOffset = CheckEven ? 0 : 4;
2166 if (ShuffleKind == 1) // Unary
2167 return isVMerge(N, indexOffset, 0);
2168 else if (ShuffleKind == 0) // Normal
2169 return isVMerge(N, indexOffset, 16);
2170 else
2171 return false;
2172 }
2173 return false;
2174}
2175
2176/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2177/// amount, otherwise return -1.
2178/// The ShuffleKind distinguishes between big-endian operations with two
2179/// different inputs (0), either-endian operations with two identical inputs
2180/// (1), and little-endian operations with two different inputs (2). For the
2181/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2182int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2183 SelectionDAG &DAG) {
2184 if (N->getValueType(0) != MVT::v16i8)
2185 return -1;
2186
2187 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2188
2189 // Find the first non-undef value in the shuffle mask.
2190 unsigned i;
2191 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2192 /*search*/;
2193
2194 if (i == 16) return -1; // all undef.
2195
2196 // Otherwise, check to see if the rest of the elements are consecutively
2197 // numbered from this value.
2198 unsigned ShiftAmt = SVOp->getMaskElt(i);
2199 if (ShiftAmt < i) return -1;
2200
2201 ShiftAmt -= i;
2202 bool isLE = DAG.getDataLayout().isLittleEndian();
2203
2204 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2205 // Check the rest of the elements to see if they are consecutive.
2206 for (++i; i != 16; ++i)
2207 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2208 return -1;
2209 } else if (ShuffleKind == 1) {
2210 // Check the rest of the elements to see if they are consecutive.
2211 for (++i; i != 16; ++i)
2212 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2213 return -1;
2214 } else
2215 return -1;
2216
2217 if (isLE)
2218 ShiftAmt = 16 - ShiftAmt;
2219
2220 return ShiftAmt;
2221}
2222
2223/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2224/// specifies a splat of a single element that is suitable for input to
2225/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2227 EVT VT = N->getValueType(0);
2228 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2229 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2230
2231 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2232 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2233
2234 // The consecutive indices need to specify an element, not part of two
2235 // different elements. So abandon ship early if this isn't the case.
2236 if (N->getMaskElt(0) % EltSize != 0)
2237 return false;
2238
2239 // This is a splat operation if each element of the permute is the same, and
2240 // if the value doesn't reference the second vector.
2241 unsigned ElementBase = N->getMaskElt(0);
2242
2243 // FIXME: Handle UNDEF elements too!
2244 if (ElementBase >= 16)
2245 return false;
2246
2247 // Check that the indices are consecutive, in the case of a multi-byte element
2248 // splatted with a v16i8 mask.
2249 for (unsigned i = 1; i != EltSize; ++i)
2250 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2251 return false;
2252
2253 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2254 if (N->getMaskElt(i) < 0) continue;
2255 for (unsigned j = 0; j != EltSize; ++j)
2256 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2257 return false;
2258 }
2259 return true;
2260}
2261
2262/// Check that the mask is shuffling N byte elements. Within each N byte
2263/// element of the mask, the indices could be either in increasing or
2264/// decreasing order as long as they are consecutive.
2265/// \param[in] N the shuffle vector SD Node to analyze
2266/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2267/// Word/DoubleWord/QuadWord).
2268/// \param[in] StepLen the delta indices number among the N byte element, if
2269/// the mask is in increasing/decreasing order then it is 1/-1.
2270/// \return true iff the mask is shuffling N byte elements.
2271static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2272 int StepLen) {
2273 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2274 "Unexpected element width.");
2275 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2276
2277 unsigned NumOfElem = 16 / Width;
2278 unsigned MaskVal[16]; // Width is never greater than 16
2279 for (unsigned i = 0; i < NumOfElem; ++i) {
2280 MaskVal[0] = N->getMaskElt(i * Width);
2281 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2282 return false;
2283 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2284 return false;
2285 }
2286
2287 for (unsigned int j = 1; j < Width; ++j) {
2288 MaskVal[j] = N->getMaskElt(i * Width + j);
2289 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2290 return false;
2291 }
2292 }
2293 }
2294
2295 return true;
2296}
2297
2298bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2299 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2300 if (!isNByteElemShuffleMask(N, 4, 1))
2301 return false;
2302
2303 // Now we look at mask elements 0,4,8,12
2304 unsigned M0 = N->getMaskElt(0) / 4;
2305 unsigned M1 = N->getMaskElt(4) / 4;
2306 unsigned M2 = N->getMaskElt(8) / 4;
2307 unsigned M3 = N->getMaskElt(12) / 4;
2308 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2309 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2310
2311 // Below, let H and L be arbitrary elements of the shuffle mask
2312 // where H is in the range [4,7] and L is in the range [0,3].
2313 // H, 1, 2, 3 or L, 5, 6, 7
2314 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2315 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2317 InsertAtByte = IsLE ? 12 : 0;
2318 Swap = M0 < 4;
2319 return true;
2320 }
2321 // 0, H, 2, 3 or 4, L, 6, 7
2322 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2323 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2325 InsertAtByte = IsLE ? 8 : 4;
2326 Swap = M1 < 4;
2327 return true;
2328 }
2329 // 0, 1, H, 3 or 4, 5, L, 7
2330 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2331 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2333 InsertAtByte = IsLE ? 4 : 8;
2334 Swap = M2 < 4;
2335 return true;
2336 }
2337 // 0, 1, 2, H or 4, 5, 6, L
2338 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2339 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2340 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2341 InsertAtByte = IsLE ? 0 : 12;
2342 Swap = M3 < 4;
2343 return true;
2344 }
2345
2346 // If both vector operands for the shuffle are the same vector, the mask will
2347 // contain only elements from the first one and the second one will be undef.
2348 if (N->getOperand(1).isUndef()) {
2349 ShiftElts = 0;
2350 Swap = true;
2351 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2352 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2353 InsertAtByte = IsLE ? 12 : 0;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2357 InsertAtByte = IsLE ? 8 : 4;
2358 return true;
2359 }
2360 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2361 InsertAtByte = IsLE ? 4 : 8;
2362 return true;
2363 }
2364 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2365 InsertAtByte = IsLE ? 0 : 12;
2366 return true;
2367 }
2368 }
2369
2370 return false;
2371}
2372
2374 bool &Swap, bool IsLE) {
2375 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2376 // Ensure each byte index of the word is consecutive.
2377 if (!isNByteElemShuffleMask(N, 4, 1))
2378 return false;
2379
2380 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2381 unsigned M0 = N->getMaskElt(0) / 4;
2382 unsigned M1 = N->getMaskElt(4) / 4;
2383 unsigned M2 = N->getMaskElt(8) / 4;
2384 unsigned M3 = N->getMaskElt(12) / 4;
2385
2386 // If both vector operands for the shuffle are the same vector, the mask will
2387 // contain only elements from the first one and the second one will be undef.
2388 if (N->getOperand(1).isUndef()) {
2389 assert(M0 < 4 && "Indexing into an undef vector?");
2390 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2391 return false;
2392
2393 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2394 Swap = false;
2395 return true;
2396 }
2397
2398 // Ensure each word index of the ShuffleVector Mask is consecutive.
2399 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2400 return false;
2401
2402 if (IsLE) {
2403 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2404 // Input vectors don't need to be swapped if the leading element
2405 // of the result is one of the 3 left elements of the second vector
2406 // (or if there is no shift to be done at all).
2407 Swap = false;
2408 ShiftElts = (8 - M0) % 8;
2409 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2410 // Input vectors need to be swapped if the leading element
2411 // of the result is one of the 3 left elements of the first vector
2412 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2413 Swap = true;
2414 ShiftElts = (4 - M0) % 4;
2415 }
2416
2417 return true;
2418 } else { // BE
2419 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2420 // Input vectors don't need to be swapped if the leading element
2421 // of the result is one of the 4 elements of the first vector.
2422 Swap = false;
2423 ShiftElts = M0;
2424 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2425 // Input vectors need to be swapped if the leading element
2426 // of the result is one of the 4 elements of the right vector.
2427 Swap = true;
2428 ShiftElts = M0 - 4;
2429 }
2430
2431 return true;
2432 }
2433}
2434
2436 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2437
2438 if (!isNByteElemShuffleMask(N, Width, -1))
2439 return false;
2440
2441 for (int i = 0; i < 16; i += Width)
2442 if (N->getMaskElt(i) != i + Width - 1)
2443 return false;
2444
2445 return true;
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 2);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 4);
2454}
2455
2457 return isXXBRShuffleMaskHelper(N, 8);
2458}
2459
2461 return isXXBRShuffleMaskHelper(N, 16);
2462}
2463
2464/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2465/// if the inputs to the instruction should be swapped and set \p DM to the
2466/// value for the immediate.
2467/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2468/// AND element 0 of the result comes from the first input (LE) or second input
2469/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2470/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2471/// mask.
2473 bool &Swap, bool IsLE) {
2474 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2475
2476 // Ensure each byte index of the double word is consecutive.
2477 if (!isNByteElemShuffleMask(N, 8, 1))
2478 return false;
2479
2480 unsigned M0 = N->getMaskElt(0) / 8;
2481 unsigned M1 = N->getMaskElt(8) / 8;
2482 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2483
2484 // If both vector operands for the shuffle are the same vector, the mask will
2485 // contain only elements from the first one and the second one will be undef.
2486 if (N->getOperand(1).isUndef()) {
2487 if ((M0 | M1) < 2) {
2488 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2489 Swap = false;
2490 return true;
2491 } else
2492 return false;
2493 }
2494
2495 if (IsLE) {
2496 if (M0 > 1 && M1 < 2) {
2497 Swap = false;
2498 } else if (M0 < 2 && M1 > 1) {
2499 M0 = (M0 + 2) % 4;
2500 M1 = (M1 + 2) % 4;
2501 Swap = true;
2502 } else
2503 return false;
2504
2505 // Note: if control flow comes here that means Swap is already set above
2506 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2507 return true;
2508 } else { // BE
2509 if (M0 < 2 && M1 > 1) {
2510 Swap = false;
2511 } else if (M0 > 1 && M1 < 2) {
2512 M0 = (M0 + 2) % 4;
2513 M1 = (M1 + 2) % 4;
2514 Swap = true;
2515 } else
2516 return false;
2517
2518 // Note: if control flow comes here that means Swap is already set above
2519 DM = (M0 << 1) + (M1 & 1);
2520 return true;
2521 }
2522}
2523
2524
2525/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2526/// appropriate for PPC mnemonics (which have a big endian bias - namely
2527/// elements are counted from the left of the vector register).
2528unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2529 SelectionDAG &DAG) {
2530 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2531 assert(isSplatShuffleMask(SVOp, EltSize));
2532 EVT VT = SVOp->getValueType(0);
2533
2534 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2535 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2536 : SVOp->getMaskElt(0);
2537
2538 if (DAG.getDataLayout().isLittleEndian())
2539 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2540 else
2541 return SVOp->getMaskElt(0) / EltSize;
2542}
2543
2544/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2545/// by using a vspltis[bhw] instruction of the specified element size, return
2546/// the constant being splatted. The ByteSize field indicates the number of
2547/// bytes of each element [124] -> [bhw].
2549 SDValue OpVal;
2550
2551 // If ByteSize of the splat is bigger than the element size of the
2552 // build_vector, then we have a case where we are checking for a splat where
2553 // multiple elements of the buildvector are folded together into a single
2554 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2555 unsigned EltSize = 16/N->getNumOperands();
2556 if (EltSize < ByteSize) {
2557 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2558 SDValue UniquedVals[4];
2559 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2560
2561 // See if all of the elements in the buildvector agree across.
2562 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2563 if (N->getOperand(i).isUndef()) continue;
2564 // If the element isn't a constant, bail fully out.
2565 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2566
2567 if (!UniquedVals[i&(Multiple-1)].getNode())
2568 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2569 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2570 return SDValue(); // no match.
2571 }
2572
2573 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2574 // either constant or undef values that are identical for each chunk. See
2575 // if these chunks can form into a larger vspltis*.
2576
2577 // Check to see if all of the leading entries are either 0 or -1. If
2578 // neither, then this won't fit into the immediate field.
2579 bool LeadingZero = true;
2580 bool LeadingOnes = true;
2581 for (unsigned i = 0; i != Multiple-1; ++i) {
2582 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2583
2584 LeadingZero &= isNullConstant(UniquedVals[i]);
2585 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2586 }
2587 // Finally, check the least significant entry.
2588 if (LeadingZero) {
2589 if (!UniquedVals[Multiple-1].getNode())
2590 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2591 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2592 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2593 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2594 }
2595 if (LeadingOnes) {
2596 if (!UniquedVals[Multiple-1].getNode())
2597 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2598 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2599 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2600 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2601 }
2602
2603 return SDValue();
2604 }
2605
2606 // Check to see if this buildvec has a single non-undef value in its elements.
2607 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2608 if (N->getOperand(i).isUndef()) continue;
2609 if (!OpVal.getNode())
2610 OpVal = N->getOperand(i);
2611 else if (OpVal != N->getOperand(i))
2612 return SDValue();
2613 }
2614
2615 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2616
2617 unsigned ValSizeInBytes = EltSize;
2618 uint64_t Value = 0;
2619 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2620 Value = CN->getZExtValue();
2621 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2622 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2623 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2624 }
2625
2626 // If the splat value is larger than the element value, then we can never do
2627 // this splat. The only case that we could fit the replicated bits into our
2628 // immediate field for would be zero, and we prefer to use vxor for it.
2629 if (ValSizeInBytes < ByteSize) return SDValue();
2630
2631 // If the element value is larger than the splat value, check if it consists
2632 // of a repeated bit pattern of size ByteSize.
2633 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2634 return SDValue();
2635
2636 // Properly sign extend the value.
2637 int MaskVal = SignExtend32(Value, ByteSize * 8);
2638
2639 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2640 if (MaskVal == 0) return SDValue();
2641
2642 // Finally, if this value fits in a 5 bit sext field, return it
2643 if (SignExtend32<5>(MaskVal) == MaskVal)
2644 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2645 return SDValue();
2646}
2647
2648//===----------------------------------------------------------------------===//
2649// Addressing Mode Selection
2650//===----------------------------------------------------------------------===//
2651
2652/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2653/// or 64-bit immediate, and if the value can be accurately represented as a
2654/// sign extension from a 16-bit value. If so, this returns true and the
2655/// immediate.
2656bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2657 if (!isa<ConstantSDNode>(N))
2658 return false;
2659
2660 Imm = (int16_t)N->getAsZExtVal();
2661 if (N->getValueType(0) == MVT::i32)
2662 return Imm == (int32_t)N->getAsZExtVal();
2663 else
2664 return Imm == (int64_t)N->getAsZExtVal();
2665}
2667 return isIntS16Immediate(Op.getNode(), Imm);
2668}
2669
2670/// Used when computing address flags for selecting loads and stores.
2671/// If we have an OR, check if the LHS and RHS are provably disjoint.
2672/// An OR of two provably disjoint values is equivalent to an ADD.
2673/// Most PPC load/store instructions compute the effective address as a sum,
2674/// so doing this conversion is useful.
2675static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2676 if (N.getOpcode() != ISD::OR)
2677 return false;
2678 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2679 if (!LHSKnown.Zero.getBoolValue())
2680 return false;
2681 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2682 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2683}
2684
2685/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2686/// be represented as an indexed [r+r] operation.
2688 SDValue &Index,
2689 SelectionDAG &DAG) const {
2690 for (SDNode *U : N->users()) {
2691 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2692 if (Memop->getMemoryVT() == MVT::f64) {
2693 Base = N.getOperand(0);
2694 Index = N.getOperand(1);
2695 return true;
2696 }
2697 }
2698 }
2699 return false;
2700}
2701
2702/// isIntS34Immediate - This method tests if value of node given can be
2703/// accurately represented as a sign extension from a 34-bit value. If so,
2704/// this returns true and the immediate.
2705bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2706 if (!isa<ConstantSDNode>(N))
2707 return false;
2708
2709 Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue();
2710 return isInt<34>(Imm);
2711}
2713 return isIntS34Immediate(Op.getNode(), Imm);
2714}
2715
2716/// SelectAddressRegReg - Given the specified addressed, check to see if it
2717/// can be represented as an indexed [r+r] operation. Returns false if it
2718/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2719/// non-zero and N can be represented by a base register plus a signed 16-bit
2720/// displacement, make a more precise judgement by checking (displacement % \p
2721/// EncodingAlignment).
2723 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2724 MaybeAlign EncodingAlignment) const {
2725 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2726 // a [pc+imm].
2728 return false;
2729
2730 int16_t Imm = 0;
2731 if (N.getOpcode() == ISD::ADD) {
2732 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2733 // SPE load/store can only handle 8-bit offsets.
2734 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2735 return true;
2736 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2737 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2738 return false; // r+i
2739 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2740 return false; // r+i
2741
2742 Base = N.getOperand(0);
2743 Index = N.getOperand(1);
2744 return true;
2745 } else if (N.getOpcode() == ISD::OR) {
2746 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2747 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2748 return false; // r+i can fold it if we can.
2749
2750 // If this is an or of disjoint bitfields, we can codegen this as an add
2751 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2752 // disjoint.
2753 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2754
2755 if (LHSKnown.Zero.getBoolValue()) {
2756 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2757 // If all of the bits are known zero on the LHS or RHS, the add won't
2758 // carry.
2759 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2760 Base = N.getOperand(0);
2761 Index = N.getOperand(1);
2762 return true;
2763 }
2764 }
2765 }
2766
2767 return false;
2768}
2769
2770// If we happen to be doing an i64 load or store into a stack slot that has
2771// less than a 4-byte alignment, then the frame-index elimination may need to
2772// use an indexed load or store instruction (because the offset may not be a
2773// multiple of 4). The extra register needed to hold the offset comes from the
2774// register scavenger, and it is possible that the scavenger will need to use
2775// an emergency spill slot. As a result, we need to make sure that a spill slot
2776// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2777// stack slot.
2778static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2779 // FIXME: This does not handle the LWA case.
2780 if (VT != MVT::i64)
2781 return;
2782
2783 // NOTE: We'll exclude negative FIs here, which come from argument
2784 // lowering, because there are no known test cases triggering this problem
2785 // using packed structures (or similar). We can remove this exclusion if
2786 // we find such a test case. The reason why this is so test-case driven is
2787 // because this entire 'fixup' is only to prevent crashes (from the
2788 // register scavenger) on not-really-valid inputs. For example, if we have:
2789 // %a = alloca i1
2790 // %b = bitcast i1* %a to i64*
2791 // store i64* a, i64 b
2792 // then the store should really be marked as 'align 1', but is not. If it
2793 // were marked as 'align 1' then the indexed form would have been
2794 // instruction-selected initially, and the problem this 'fixup' is preventing
2795 // won't happen regardless.
2796 if (FrameIdx < 0)
2797 return;
2798
2800 MachineFrameInfo &MFI = MF.getFrameInfo();
2801
2802 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2803 return;
2804
2805 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2806 FuncInfo->setHasNonRISpills();
2807}
2808
2809/// Returns true if the address N can be represented by a base register plus
2810/// a signed 16-bit displacement [r+imm], and if it is not better
2811/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2812/// displacements that are multiples of that value.
2814 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2815 MaybeAlign EncodingAlignment) const {
2816 // FIXME dl should come from parent load or store, not from address
2817 SDLoc dl(N);
2818
2819 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2820 // a [pc+imm].
2822 return false;
2823
2824 // If this can be more profitably realized as r+r, fail.
2825 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2826 return false;
2827
2828 if (N.getOpcode() == ISD::ADD) {
2829 int16_t imm = 0;
2830 if (isIntS16Immediate(N.getOperand(1), imm) &&
2831 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2832 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2833 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2834 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2835 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2836 } else {
2837 Base = N.getOperand(0);
2838 }
2839 return true; // [r+i]
2840 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2841 // Match LOAD (ADD (X, Lo(G))).
2842 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2843 "Cannot handle constant offsets yet!");
2844 Disp = N.getOperand(1).getOperand(0); // The global address.
2849 Base = N.getOperand(0);
2850 return true; // [&g+r]
2851 }
2852 } else if (N.getOpcode() == ISD::OR) {
2853 int16_t imm = 0;
2854 if (isIntS16Immediate(N.getOperand(1), imm) &&
2855 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2856 // If this is an or of disjoint bitfields, we can codegen this as an add
2857 // (for better address arithmetic) if the LHS and RHS of the OR are
2858 // provably disjoint.
2859 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2860
2861 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2862 // If all of the bits are known zero on the LHS or RHS, the add won't
2863 // carry.
2864 if (FrameIndexSDNode *FI =
2865 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2866 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2867 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2868 } else {
2869 Base = N.getOperand(0);
2870 }
2871 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2872 return true;
2873 }
2874 }
2875 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2876 // Loading from a constant address.
2877
2878 // If this address fits entirely in a 16-bit sext immediate field, codegen
2879 // this as "d, 0"
2880 int16_t Imm;
2881 if (isIntS16Immediate(CN, Imm) &&
2882 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2883 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2884 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2885 CN->getValueType(0));
2886 return true;
2887 }
2888
2889 // Handle 32-bit sext immediates with LIS + addr mode.
2890 if ((CN->getValueType(0) == MVT::i32 ||
2891 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2892 (!EncodingAlignment ||
2893 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2894 int Addr = (int)CN->getZExtValue();
2895
2896 // Otherwise, break this down into an LIS + disp.
2897 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2898
2899 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2900 MVT::i32);
2901 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2902 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2903 return true;
2904 }
2905 }
2906
2907 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2908 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2909 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2910 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2911 } else
2912 Base = N;
2913 return true; // [r+0]
2914}
2915
2916/// Similar to the 16-bit case but for instructions that take a 34-bit
2917/// displacement field (prefixed loads/stores).
2919 SDValue &Base,
2920 SelectionDAG &DAG) const {
2921 // Only on 64-bit targets.
2922 if (N.getValueType() != MVT::i64)
2923 return false;
2924
2925 SDLoc dl(N);
2926 int64_t Imm = 0;
2927
2928 if (N.getOpcode() == ISD::ADD) {
2929 if (!isIntS34Immediate(N.getOperand(1), Imm))
2930 return false;
2931 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2932 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2933 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2934 else
2935 Base = N.getOperand(0);
2936 return true;
2937 }
2938
2939 if (N.getOpcode() == ISD::OR) {
2940 if (!isIntS34Immediate(N.getOperand(1), Imm))
2941 return false;
2942 // If this is an or of disjoint bitfields, we can codegen this as an add
2943 // (for better address arithmetic) if the LHS and RHS of the OR are
2944 // provably disjoint.
2945 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2946 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2947 return false;
2948 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2949 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2950 else
2951 Base = N.getOperand(0);
2952 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2953 return true;
2954 }
2955
2956 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2957 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2958 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2959 return true;
2960 }
2961
2962 return false;
2963}
2964
2965/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2966/// represented as an indexed [r+r] operation.
2968 SDValue &Index,
2969 SelectionDAG &DAG) const {
2970 // Check to see if we can easily represent this as an [r+r] address. This
2971 // will fail if it thinks that the address is more profitably represented as
2972 // reg+imm, e.g. where imm = 0.
2973 if (SelectAddressRegReg(N, Base, Index, DAG))
2974 return true;
2975
2976 // If the address is the result of an add, we will utilize the fact that the
2977 // address calculation includes an implicit add. However, we can reduce
2978 // register pressure if we do not materialize a constant just for use as the
2979 // index register. We only get rid of the add if it is not an add of a
2980 // value and a 16-bit signed constant and both have a single use.
2981 int16_t imm = 0;
2982 if (N.getOpcode() == ISD::ADD &&
2983 (!isIntS16Immediate(N.getOperand(1), imm) ||
2984 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2985 Base = N.getOperand(0);
2986 Index = N.getOperand(1);
2987 return true;
2988 }
2989
2990 // Otherwise, do it the hard way, using R0 as the base register.
2991 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2992 N.getValueType());
2993 Index = N;
2994 return true;
2995}
2996
2997template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2998 Ty *PCRelCand = dyn_cast<Ty>(N);
2999 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
3000}
3001
3002/// Returns true if this address is a PC Relative address.
3003/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3004/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3006 // This is a materialize PC Relative node. Always select this as PC Relative.
3007 Base = N;
3008 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3009 return true;
3010 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3011 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3012 isValidPCRelNode<JumpTableSDNode>(N) ||
3013 isValidPCRelNode<BlockAddressSDNode>(N))
3014 return true;
3015 return false;
3016}
3017
3018/// Returns true if we should use a direct load into vector instruction
3019/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3020static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3021
3022 // If there are any other uses other than scalar to vector, then we should
3023 // keep it as a scalar load -> direct move pattern to prevent multiple
3024 // loads.
3025 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3026 if (!LD)
3027 return false;
3028
3029 EVT MemVT = LD->getMemoryVT();
3030 if (!MemVT.isSimple())
3031 return false;
3032 switch(MemVT.getSimpleVT().SimpleTy) {
3033 case MVT::i64:
3034 break;
3035 case MVT::i32:
3036 if (!ST.hasP8Vector())
3037 return false;
3038 break;
3039 case MVT::i16:
3040 case MVT::i8:
3041 if (!ST.hasP9Vector())
3042 return false;
3043 break;
3044 default:
3045 return false;
3046 }
3047
3048 SDValue LoadedVal(N, 0);
3049 if (!LoadedVal.hasOneUse())
3050 return false;
3051
3052 for (SDUse &Use : LD->uses())
3053 if (Use.getResNo() == 0 &&
3054 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3056 return false;
3057
3058 return true;
3059}
3060
3061/// getPreIndexedAddressParts - returns true by value, base pointer and
3062/// offset pointer and addressing mode by reference if the node's address
3063/// can be legally represented as pre-indexed load / store address.
3065 SDValue &Offset,
3067 SelectionDAG &DAG) const {
3068 if (DisablePPCPreinc) return false;
3069
3070 bool isLoad = true;
3071 SDValue Ptr;
3072 EVT VT;
3073 Align Alignment;
3074 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3075 Ptr = LD->getBasePtr();
3076 VT = LD->getMemoryVT();
3077 Alignment = LD->getAlign();
3078 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3079 Ptr = ST->getBasePtr();
3080 VT = ST->getMemoryVT();
3081 Alignment = ST->getAlign();
3082 isLoad = false;
3083 } else
3084 return false;
3085
3086 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3087 // instructions because we can fold these into a more efficient instruction
3088 // instead, (such as LXSD).
3089 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3090 return false;
3091 }
3092
3093 // PowerPC doesn't have preinc load/store instructions for vectors
3094 if (VT.isVector())
3095 return false;
3096
3097 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3098 // Common code will reject creating a pre-inc form if the base pointer
3099 // is a frame index, or if N is a store and the base pointer is either
3100 // the same as or a predecessor of the value being stored. Check for
3101 // those situations here, and try with swapped Base/Offset instead.
3102 bool Swap = false;
3103
3104 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3105 Swap = true;
3106 else if (!isLoad) {
3107 SDValue Val = cast<StoreSDNode>(N)->getValue();
3108 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3109 Swap = true;
3110 }
3111
3112 if (Swap)
3114
3115 AM = ISD::PRE_INC;
3116 return true;
3117 }
3118
3119 // LDU/STU can only handle immediates that are a multiple of 4.
3120 if (VT != MVT::i64) {
3121 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3122 return false;
3123 } else {
3124 // LDU/STU need an address with at least 4-byte alignment.
3125 if (Alignment < Align(4))
3126 return false;
3127
3128 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3129 return false;
3130 }
3131
3132 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3133 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3134 // sext i32 to i64 when addr mode is r+i.
3135 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3136 LD->getExtensionType() == ISD::SEXTLOAD &&
3137 isa<ConstantSDNode>(Offset))
3138 return false;
3139 }
3140
3141 AM = ISD::PRE_INC;
3142 return true;
3143}
3144
3145//===----------------------------------------------------------------------===//
3146// LowerOperation implementation
3147//===----------------------------------------------------------------------===//
3148
3149/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3150/// and LoOpFlags to the target MO flags.
3151static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3152 unsigned &HiOpFlags, unsigned &LoOpFlags,
3153 const GlobalValue *GV = nullptr) {
3154 HiOpFlags = PPCII::MO_HA;
3155 LoOpFlags = PPCII::MO_LO;
3156
3157 // Don't use the pic base if not in PIC relocation model.
3158 if (IsPIC) {
3159 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3160 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3161 }
3162}
3163
3164static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3165 SelectionDAG &DAG) {
3166 SDLoc DL(HiPart);
3167 EVT PtrVT = HiPart.getValueType();
3168 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3169
3170 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3171 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3172
3173 // With PIC, the first instruction is actually "GR+hi(&G)".
3174 if (isPIC)
3175 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3176 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3177
3178 // Generate non-pic code that has direct accesses to the constant pool.
3179 // The address of the global is just (hi(&g)+lo(&g)).
3180 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3181}
3182
3184 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3185 FuncInfo->setUsesTOCBasePtr();
3186}
3187
3190}
3191
3192SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3193 SDValue GA) const {
3194 EVT VT = Subtarget.getScalarIntVT();
3195 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3196 : Subtarget.isAIXABI()
3197 ? DAG.getRegister(PPC::R2, VT)
3198 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3199 SDValue Ops[] = { GA, Reg };
3200 return DAG.getMemIntrinsicNode(
3201 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3204}
3205
3206SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3207 SelectionDAG &DAG) const {
3208 EVT PtrVT = Op.getValueType();
3209 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3210 const Constant *C = CP->getConstVal();
3211
3212 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3213 // The actual address of the GlobalValue is stored in the TOC.
3214 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3215 if (Subtarget.isUsingPCRelativeCalls()) {
3216 SDLoc DL(CP);
3217 EVT Ty = getPointerTy(DAG.getDataLayout());
3218 SDValue ConstPool = DAG.getTargetConstantPool(
3219 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3220 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3221 }
3222 setUsesTOCBasePtr(DAG);
3223 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3224 return getTOCEntry(DAG, SDLoc(CP), GA);
3225 }
3226
3227 unsigned MOHiFlag, MOLoFlag;
3228 bool IsPIC = isPositionIndependent();
3229 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3230
3231 if (IsPIC && Subtarget.isSVR4ABI()) {
3232 SDValue GA =
3233 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3234 return getTOCEntry(DAG, SDLoc(CP), GA);
3235 }
3236
3237 SDValue CPIHi =
3238 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3239 SDValue CPILo =
3240 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3241 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3242}
3243
3244// For 64-bit PowerPC, prefer the more compact relative encodings.
3245// This trades 32 bits per jump table entry for one or two instructions
3246// on the jump site.
3248 if (isJumpTableRelative())
3250
3252}
3253
3256 return false;
3257 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3258 return true;
3260}
3261
3263 SelectionDAG &DAG) const {
3264 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3266
3267 switch (getTargetMachine().getCodeModel()) {
3268 case CodeModel::Small:
3269 case CodeModel::Medium:
3271 default:
3272 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3274 }
3275}
3276
3277const MCExpr *
3279 unsigned JTI,
3280 MCContext &Ctx) const {
3281 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3283
3284 switch (getTargetMachine().getCodeModel()) {
3285 case CodeModel::Small:
3286 case CodeModel::Medium:
3288 default:
3289 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3290 }
3291}
3292
3293SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3294 EVT PtrVT = Op.getValueType();
3295 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3296
3297 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3298 if (Subtarget.isUsingPCRelativeCalls()) {
3299 SDLoc DL(JT);
3300 EVT Ty = getPointerTy(DAG.getDataLayout());
3301 SDValue GA =
3302 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3303 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3304 return MatAddr;
3305 }
3306
3307 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3308 // The actual address of the GlobalValue is stored in the TOC.
3309 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3310 setUsesTOCBasePtr(DAG);
3311 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3312 return getTOCEntry(DAG, SDLoc(JT), GA);
3313 }
3314
3315 unsigned MOHiFlag, MOLoFlag;
3316 bool IsPIC = isPositionIndependent();
3317 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3318
3319 if (IsPIC && Subtarget.isSVR4ABI()) {
3320 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3322 return getTOCEntry(DAG, SDLoc(GA), GA);
3323 }
3324
3325 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3326 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3327 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3328}
3329
3330SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3331 SelectionDAG &DAG) const {
3332 EVT PtrVT = Op.getValueType();
3333 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3334 const BlockAddress *BA = BASDN->getBlockAddress();
3335
3336 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3337 if (Subtarget.isUsingPCRelativeCalls()) {
3338 SDLoc DL(BASDN);
3339 EVT Ty = getPointerTy(DAG.getDataLayout());
3340 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3342 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3343 return MatAddr;
3344 }
3345
3346 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3347 // The actual BlockAddress is stored in the TOC.
3348 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3349 setUsesTOCBasePtr(DAG);
3350 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3351 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3352 }
3353
3354 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3355 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3356 return getTOCEntry(
3357 DAG, SDLoc(BASDN),
3358 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3359
3360 unsigned MOHiFlag, MOLoFlag;
3361 bool IsPIC = isPositionIndependent();
3362 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3363 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3364 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3365 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3366}
3367
3368SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3369 SelectionDAG &DAG) const {
3370 if (Subtarget.isAIXABI())
3371 return LowerGlobalTLSAddressAIX(Op, DAG);
3372
3373 return LowerGlobalTLSAddressLinux(Op, DAG);
3374}
3375
3376/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3377/// and then apply the update.
3379 SelectionDAG &DAG,
3380 const TargetMachine &TM) {
3381 // Initialize TLS model opt setting lazily:
3382 // (1) Use initial-exec for single TLS var references within current function.
3383 // (2) Use local-dynamic for multiple TLS var references within current
3384 // function.
3385 PPCFunctionInfo *FuncInfo =
3387 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3389 // Iterate over all instructions within current function, collect all TLS
3390 // global variables (global variables taken as the first parameter to
3391 // Intrinsic::threadlocal_address).
3392 const Function &Func = DAG.getMachineFunction().getFunction();
3393 for (const BasicBlock &BB : Func)
3394 for (const Instruction &I : BB)
3395 if (I.getOpcode() == Instruction::Call)
3396 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3397 if (Function *CF = CI->getCalledFunction())
3398 if (CF->isDeclaration() &&
3399 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3400 if (const GlobalValue *GV =
3401 dyn_cast<GlobalValue>(I.getOperand(0))) {
3402 TLSModel::Model GVModel = TM.getTLSModel(GV);
3403 if (GVModel == TLSModel::LocalDynamic)
3404 TLSGV.insert(GV);
3405 }
3406
3407 unsigned TLSGVCnt = TLSGV.size();
3408 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3409 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3410 FuncInfo->setAIXFuncUseTLSIEForLD();
3412 }
3413
3414 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3415 LLVM_DEBUG(
3416 dbgs() << DAG.getMachineFunction().getName()
3417 << " function is using the TLS-IE model for TLS-LD access.\n");
3418 Model = TLSModel::InitialExec;
3419 }
3420}
3421
3422SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3423 SelectionDAG &DAG) const {
3424 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3425
3426 if (DAG.getTarget().useEmulatedTLS())
3427 report_fatal_error("Emulated TLS is not yet supported on AIX");
3428
3429 SDLoc dl(GA);
3430 const GlobalValue *GV = GA->getGlobal();
3431 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3432 bool Is64Bit = Subtarget.isPPC64();
3434
3435 // Apply update to the TLS model.
3436 if (Subtarget.hasAIXShLibTLSModelOpt())
3438
3439 // TLS variables are accessed through TOC entries.
3440 // To support this, set the DAG to use the TOC base pointer.
3441 setUsesTOCBasePtr(DAG);
3442
3443 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3444
3445 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3446 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3447 bool HasAIXSmallTLSGlobalAttr = false;
3448 SDValue VariableOffsetTGA =
3449 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3450 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3451 SDValue TLSReg;
3452
3453 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3454 if (GVar->hasAttribute("aix-small-tls"))
3455 HasAIXSmallTLSGlobalAttr = true;
3456
3457 if (Is64Bit) {
3458 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3459 // involves a load of the variable offset (from the TOC), followed by an
3460 // add of the loaded variable offset to R13 (the thread pointer).
3461 // This code sequence looks like:
3462 // ld reg1,var[TC](2)
3463 // add reg2, reg1, r13 // r13 contains the thread pointer
3464 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3465
3466 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3467 // global variable attribute, produce a faster access sequence for
3468 // local-exec TLS variables where the offset from the TLS base is encoded
3469 // as an immediate operand.
3470 //
3471 // We only utilize the faster local-exec access sequence when the TLS
3472 // variable has a size within the policy limit. We treat types that are
3473 // not sized or are empty as being over the policy size limit.
3474 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3475 IsTLSLocalExecModel) {
3476 Type *GVType = GV->getValueType();
3477 if (GVType->isSized() && !GVType->isEmptyTy() &&
3478 GV->getDataLayout().getTypeAllocSize(GVType) <=
3480 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3481 }
3482 } else {
3483 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3484 // involves loading the variable offset from the TOC, generating a call to
3485 // .__get_tpointer to get the thread pointer (which will be in R3), and
3486 // adding the two together:
3487 // lwz reg1,var[TC](2)
3488 // bla .__get_tpointer
3489 // add reg2, reg1, r3
3490 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3491
3492 // We do not implement the 32-bit version of the faster access sequence
3493 // for local-exec that is controlled by the -maix-small-local-exec-tls
3494 // option, or the "aix-small-tls" global variable attribute.
3495 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3496 report_fatal_error("The small-local-exec TLS access sequence is "
3497 "currently only supported on AIX (64-bit mode).");
3498 }
3499 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3500 }
3501
3502 if (Model == TLSModel::LocalDynamic) {
3503 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3504
3505 // We do not implement the 32-bit version of the faster access sequence
3506 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3507 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3508 report_fatal_error("The small-local-dynamic TLS access sequence is "
3509 "currently only supported on AIX (64-bit mode).");
3510
3511 // For local-dynamic on AIX, we need to generate one TOC entry for each
3512 // variable offset, and a single module-handle TOC entry for the entire
3513 // file.
3514
3515 SDValue VariableOffsetTGA =
3516 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3517 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3518
3520 GlobalVariable *TLSGV =
3521 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3522 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3524 assert(TLSGV && "Not able to create GV for _$TLSML.");
3525 SDValue ModuleHandleTGA =
3526 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3527 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3528 SDValue ModuleHandle =
3529 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3530
3531 // With the -maix-small-local-dynamic-tls option, produce a faster access
3532 // sequence for local-dynamic TLS variables where the offset from the
3533 // module-handle is encoded as an immediate operand.
3534 //
3535 // We only utilize the faster local-dynamic access sequence when the TLS
3536 // variable has a size within the policy limit. We treat types that are
3537 // not sized or are empty as being over the policy size limit.
3538 if (HasAIXSmallLocalDynamicTLS) {
3539 Type *GVType = GV->getValueType();
3540 if (GVType->isSized() && !GVType->isEmptyTy() &&
3541 GV->getDataLayout().getTypeAllocSize(GVType) <=
3543 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3544 ModuleHandle);
3545 }
3546
3547 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3548 }
3549
3550 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3551 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3552 // need to generate two TOC entries, one for the variable offset, one for the
3553 // region handle. The global address for the TOC entry of the region handle is
3554 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3555 // entry of the variable offset is created with MO_TLSGD_FLAG.
3556 SDValue VariableOffsetTGA =
3557 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3558 SDValue RegionHandleTGA =
3559 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3560 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3561 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3562 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3563 RegionHandle);
3564}
3565
3566SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3567 SelectionDAG &DAG) const {
3568 // FIXME: TLS addresses currently use medium model code sequences,
3569 // which is the most useful form. Eventually support for small and
3570 // large models could be added if users need it, at the cost of
3571 // additional complexity.
3572 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3573 if (DAG.getTarget().useEmulatedTLS())
3574 return LowerToTLSEmulatedModel(GA, DAG);
3575
3576 SDLoc dl(GA);
3577 const GlobalValue *GV = GA->getGlobal();
3578 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3579 bool is64bit = Subtarget.isPPC64();
3580 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3581 PICLevel::Level picLevel = M->getPICLevel();
3582
3584 TLSModel::Model Model = TM.getTLSModel(GV);
3585
3586 if (Model == TLSModel::LocalExec) {
3587 if (Subtarget.isUsingPCRelativeCalls()) {
3588 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3589 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3591 SDValue MatAddr =
3592 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3593 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3594 }
3595
3596 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3598 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3600 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3601 : DAG.getRegister(PPC::R2, MVT::i32);
3602
3603 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3604 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3605 }
3606
3607 if (Model == TLSModel::InitialExec) {
3608 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3610 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3611 SDValue TGATLS = DAG.getTargetGlobalAddress(
3612 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3613 SDValue TPOffset;
3614 if (IsPCRel) {
3615 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3616 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3618 } else {
3619 SDValue GOTPtr;
3620 if (is64bit) {
3621 setUsesTOCBasePtr(DAG);
3622 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3623 GOTPtr =
3624 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3625 } else {
3626 if (!TM.isPositionIndependent())
3627 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3628 else if (picLevel == PICLevel::SmallPIC)
3629 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3630 else
3631 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3632 }
3633 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3634 }
3635 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3636 }
3637
3638 if (Model == TLSModel::GeneralDynamic) {
3639 if (Subtarget.isUsingPCRelativeCalls()) {
3640 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3642 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3643 }
3644
3645 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3646 SDValue GOTPtr;
3647 if (is64bit) {
3648 setUsesTOCBasePtr(DAG);
3649 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3650 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3651 GOTReg, TGA);
3652 } else {
3653 if (picLevel == PICLevel::SmallPIC)
3654 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3655 else
3656 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3657 }
3658 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3659 GOTPtr, TGA, TGA);
3660 }
3661
3662 if (Model == TLSModel::LocalDynamic) {
3663 if (Subtarget.isUsingPCRelativeCalls()) {
3664 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3666 SDValue MatPCRel =
3667 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3668 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3669 }
3670
3671 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3672 SDValue GOTPtr;
3673 if (is64bit) {
3674 setUsesTOCBasePtr(DAG);
3675 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3676 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3677 GOTReg, TGA);
3678 } else {
3679 if (picLevel == PICLevel::SmallPIC)
3680 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3681 else
3682 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3683 }
3684 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3685 PtrVT, GOTPtr, TGA, TGA);
3686 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3687 PtrVT, TLSAddr, TGA);
3688 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3689 }
3690
3691 llvm_unreachable("Unknown TLS model!");
3692}
3693
3694SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3695 SelectionDAG &DAG) const {
3696 EVT PtrVT = Op.getValueType();
3697 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3698 SDLoc DL(GSDN);
3699 const GlobalValue *GV = GSDN->getGlobal();
3700
3701 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3702 // The actual address of the GlobalValue is stored in the TOC.
3703 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3704 if (Subtarget.isUsingPCRelativeCalls()) {
3705 EVT Ty = getPointerTy(DAG.getDataLayout());
3707 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3709 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3710 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3712 return Load;
3713 } else {
3714 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3716 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3717 }
3718 }
3719 setUsesTOCBasePtr(DAG);
3720 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3721 return getTOCEntry(DAG, DL, GA);
3722 }
3723
3724 unsigned MOHiFlag, MOLoFlag;
3725 bool IsPIC = isPositionIndependent();
3726 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3727
3728 if (IsPIC && Subtarget.isSVR4ABI()) {
3729 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3730 GSDN->getOffset(),
3732 return getTOCEntry(DAG, DL, GA);
3733 }
3734
3735 SDValue GAHi =
3736 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3737 SDValue GALo =
3738 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3739
3740 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3741}
3742
3743SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3744 bool IsStrict = Op->isStrictFPOpcode();
3746 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3747 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3748 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3749 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3750 EVT LHSVT = LHS.getValueType();
3751 SDLoc dl(Op);
3752
3753 // Soften the setcc with libcall if it is fp128.
3754 if (LHSVT == MVT::f128) {
3755 assert(!Subtarget.hasP9Vector() &&
3756 "SETCC for f128 is already legal under Power9!");
3757 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3758 Op->getOpcode() == ISD::STRICT_FSETCCS);
3759 if (RHS.getNode())
3760 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3761 DAG.getCondCode(CC));
3762 if (IsStrict)
3763 return DAG.getMergeValues({LHS, Chain}, dl);
3764 return LHS;
3765 }
3766
3767 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3768
3769 if (Op.getValueType() == MVT::v2i64) {
3770 // When the operands themselves are v2i64 values, we need to do something
3771 // special because VSX has no underlying comparison operations for these.
3772 if (LHS.getValueType() == MVT::v2i64) {
3773 // Equality can be handled by casting to the legal type for Altivec
3774 // comparisons, everything else needs to be expanded.
3775 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3776 return SDValue();
3777 SDValue SetCC32 = DAG.getSetCC(
3778 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3779 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3780 int ShuffV[] = {1, 0, 3, 2};
3781 SDValue Shuff =
3782 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3783 return DAG.getBitcast(MVT::v2i64,
3785 dl, MVT::v4i32, Shuff, SetCC32));
3786 }
3787
3788 // We handle most of these in the usual way.
3789 return Op;
3790 }
3791
3792 // If we're comparing for equality to zero, expose the fact that this is
3793 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3794 // fold the new nodes.
3795 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3796 return V;
3797
3798 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3799 // Leave comparisons against 0 and -1 alone for now, since they're usually
3800 // optimized. FIXME: revisit this when we can custom lower all setcc
3801 // optimizations.
3802 if (C->isAllOnes() || C->isZero())
3803 return SDValue();
3804 }
3805
3806 // If we have an integer seteq/setne, turn it into a compare against zero
3807 // by xor'ing the rhs with the lhs, which is faster than setting a
3808 // condition register, reading it back out, and masking the correct bit. The
3809 // normal approach here uses sub to do this instead of xor. Using xor exposes
3810 // the result to other bit-twiddling opportunities.
3811 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3812 EVT VT = Op.getValueType();
3813 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3814 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3815 }
3816 return SDValue();
3817}
3818
3819SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3820 SDNode *Node = Op.getNode();
3821 EVT VT = Node->getValueType(0);
3822 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3823 SDValue InChain = Node->getOperand(0);
3824 SDValue VAListPtr = Node->getOperand(1);
3825 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3826 SDLoc dl(Node);
3827
3828 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3829
3830 // gpr_index
3831 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3832 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3833 InChain = GprIndex.getValue(1);
3834
3835 if (VT == MVT::i64) {
3836 // Check if GprIndex is even
3837 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3838 DAG.getConstant(1, dl, MVT::i32));
3839 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3840 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3841 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3842 DAG.getConstant(1, dl, MVT::i32));
3843 // Align GprIndex to be even if it isn't
3844 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3845 GprIndex);
3846 }
3847
3848 // fpr index is 1 byte after gpr
3849 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(1, dl, MVT::i32));
3851
3852 // fpr
3853 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3854 FprPtr, MachinePointerInfo(SV), MVT::i8);
3855 InChain = FprIndex.getValue(1);
3856
3857 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3858 DAG.getConstant(8, dl, MVT::i32));
3859
3860 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3861 DAG.getConstant(4, dl, MVT::i32));
3862
3863 // areas
3864 SDValue OverflowArea =
3865 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3866 InChain = OverflowArea.getValue(1);
3867
3868 SDValue RegSaveArea =
3869 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3870 InChain = RegSaveArea.getValue(1);
3871
3872 // select overflow_area if index > 8
3873 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3874 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3875
3876 // adjustment constant gpr_index * 4/8
3877 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3878 VT.isInteger() ? GprIndex : FprIndex,
3879 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3880 MVT::i32));
3881
3882 // OurReg = RegSaveArea + RegConstant
3883 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3884 RegConstant);
3885
3886 // Floating types are 32 bytes into RegSaveArea
3887 if (VT.isFloatingPoint())
3888 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3889 DAG.getConstant(32, dl, MVT::i32));
3890
3891 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3892 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3893 VT.isInteger() ? GprIndex : FprIndex,
3894 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3895 MVT::i32));
3896
3897 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3898 VT.isInteger() ? VAListPtr : FprPtr,
3899 MachinePointerInfo(SV), MVT::i8);
3900
3901 // determine if we should load from reg_save_area or overflow_area
3902 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3903
3904 // increase overflow_area by 4/8 if gpr/fpr > 8
3905 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3906 DAG.getConstant(VT.isInteger() ? 4 : 8,
3907 dl, MVT::i32));
3908
3909 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3910 OverflowAreaPlusN);
3911
3912 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3913 MachinePointerInfo(), MVT::i32);
3914
3915 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3916}
3917
3918SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3919 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3920
3921 // We have to copy the entire va_list struct:
3922 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3923 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3924 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3925 false, true, /*CI=*/nullptr, std::nullopt,
3927}
3928
3929SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3930 SelectionDAG &DAG) const {
3931 if (Subtarget.isAIXABI())
3932 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3933
3934 return Op.getOperand(0);
3935}
3936
3937SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3940
3941 assert((Op.getOpcode() == ISD::INLINEASM ||
3942 Op.getOpcode() == ISD::INLINEASM_BR) &&
3943 "Expecting Inline ASM node.");
3944
3945 // If an LR store is already known to be required then there is not point in
3946 // checking this ASM as well.
3947 if (MFI.isLRStoreRequired())
3948 return Op;
3949
3950 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3951 // type MVT::Glue. We want to ignore this last operand if that is the case.
3952 unsigned NumOps = Op.getNumOperands();
3953 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3954 --NumOps;
3955
3956 // Check all operands that may contain the LR.
3957 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3958 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3959 unsigned NumVals = Flags.getNumOperandRegisters();
3960 ++i; // Skip the ID value.
3961
3962 switch (Flags.getKind()) {
3963 default:
3964 llvm_unreachable("Bad flags!");
3968 i += NumVals;
3969 break;
3973 for (; NumVals; --NumVals, ++i) {
3974 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3975 if (Reg != PPC::LR && Reg != PPC::LR8)
3976 continue;
3977 MFI.setLRStoreRequired();
3978 return Op;
3979 }
3980 break;
3981 }
3982 }
3983 }
3984
3985 return Op;
3986}
3987
3988SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3989 SelectionDAG &DAG) const {
3990 if (Subtarget.isAIXABI())
3991 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3992
3993 SDValue Chain = Op.getOperand(0);
3994 SDValue Trmp = Op.getOperand(1); // trampoline
3995 SDValue FPtr = Op.getOperand(2); // nested function
3996 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3997 SDLoc dl(Op);
3998
3999 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4000 bool isPPC64 = (PtrVT == MVT::i64);
4001 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4002
4005
4006 Entry.Ty = IntPtrTy;
4007 Entry.Node = Trmp; Args.push_back(Entry);
4008
4009 // TrampSize == (isPPC64 ? 48 : 40);
4010 Entry.Node =
4011 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT());
4012 Args.push_back(Entry);
4013
4014 Entry.Node = FPtr; Args.push_back(Entry);
4015 Entry.Node = Nest; Args.push_back(Entry);
4016
4017 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4019 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4021 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4022
4023 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4024 return CallResult.second;
4025}
4026
4027SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4029 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4030 EVT PtrVT = getPointerTy(MF.getDataLayout());
4031
4032 SDLoc dl(Op);
4033
4034 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4035 // vastart just stores the address of the VarArgsFrameIndex slot into the
4036 // memory location argument.
4037 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4038 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4039 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4040 MachinePointerInfo(SV));
4041 }
4042
4043 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4044 // We suppose the given va_list is already allocated.
4045 //
4046 // typedef struct {
4047 // char gpr; /* index into the array of 8 GPRs
4048 // * stored in the register save area
4049 // * gpr=0 corresponds to r3,
4050 // * gpr=1 to r4, etc.
4051 // */
4052 // char fpr; /* index into the array of 8 FPRs
4053 // * stored in the register save area
4054 // * fpr=0 corresponds to f1,
4055 // * fpr=1 to f2, etc.
4056 // */
4057 // char *overflow_arg_area;
4058 // /* location on stack that holds
4059 // * the next overflow argument
4060 // */
4061 // char *reg_save_area;
4062 // /* where r3:r10 and f1:f8 (if saved)
4063 // * are stored
4064 // */
4065 // } va_list[1];
4066
4067 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4068 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4069 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4070 PtrVT);
4071 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4072 PtrVT);
4073
4074 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4075 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4076
4077 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4078 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4079
4080 uint64_t FPROffset = 1;
4081 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4082
4083 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4084
4085 // Store first byte : number of int regs
4086 SDValue firstStore =
4087 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4088 MachinePointerInfo(SV), MVT::i8);
4089 uint64_t nextOffset = FPROffset;
4090 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4091 ConstFPROffset);
4092
4093 // Store second byte : number of float regs
4094 SDValue secondStore =
4095 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4096 MachinePointerInfo(SV, nextOffset), MVT::i8);
4097 nextOffset += StackOffset;
4098 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4099
4100 // Store second word : arguments given on stack
4101 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4102 MachinePointerInfo(SV, nextOffset));
4103 nextOffset += FrameOffset;
4104 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4105
4106 // Store third word : arguments given in registers
4107 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4108 MachinePointerInfo(SV, nextOffset));
4109}
4110
4111/// FPR - The set of FP registers that should be allocated for arguments
4112/// on Darwin and AIX.
4113static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4114 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4115 PPC::F11, PPC::F12, PPC::F13};
4116
4117/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4118/// the stack.
4119static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4120 unsigned PtrByteSize) {
4121 unsigned ArgSize = ArgVT.getStoreSize();
4122 if (Flags.isByVal())
4123 ArgSize = Flags.getByValSize();
4124
4125 // Round up to multiples of the pointer size, except for array members,
4126 // which are always packed.
4127 if (!Flags.isInConsecutiveRegs())
4128 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4129
4130 return ArgSize;
4131}
4132
4133/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4134/// on the stack.
4136 ISD::ArgFlagsTy Flags,
4137 unsigned PtrByteSize) {
4138 Align Alignment(PtrByteSize);
4139
4140 // Altivec parameters are padded to a 16 byte boundary.
4141 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4142 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4143 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4144 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4145 Alignment = Align(16);
4146
4147 // ByVal parameters are aligned as requested.
4148 if (Flags.isByVal()) {
4149 auto BVAlign = Flags.getNonZeroByValAlign();
4150 if (BVAlign > PtrByteSize) {
4151 if (BVAlign.value() % PtrByteSize != 0)
4153 "ByVal alignment is not a multiple of the pointer size");
4154
4155 Alignment = BVAlign;
4156 }
4157 }
4158
4159 // Array members are always packed to their original alignment.
4160 if (Flags.isInConsecutiveRegs()) {
4161 // If the array member was split into multiple registers, the first
4162 // needs to be aligned to the size of the full type. (Except for
4163 // ppcf128, which is only aligned as its f64 components.)
4164 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4165 Alignment = Align(OrigVT.getStoreSize());
4166 else
4167 Alignment = Align(ArgVT.getStoreSize());
4168 }
4169
4170 return Alignment;
4171}
4172
4173/// CalculateStackSlotUsed - Return whether this argument will use its
4174/// stack slot (instead of being passed in registers). ArgOffset,
4175/// AvailableFPRs, and AvailableVRs must hold the current argument
4176/// position, and will be updated to account for this argument.
4177static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4178 unsigned PtrByteSize, unsigned LinkageSize,
4179 unsigned ParamAreaSize, unsigned &ArgOffset,
4180 unsigned &AvailableFPRs,
4181 unsigned &AvailableVRs) {
4182 bool UseMemory = false;
4183
4184 // Respect alignment of argument on the stack.
4185 Align Alignment =
4186 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4187 ArgOffset = alignTo(ArgOffset, Alignment);
4188 // If there's no space left in the argument save area, we must
4189 // use memory (this check also catches zero-sized arguments).
4190 if (ArgOffset >= LinkageSize + ParamAreaSize)
4191 UseMemory = true;
4192
4193 // Allocate argument on the stack.
4194 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4195 if (Flags.isInConsecutiveRegsLast())
4196 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4197 // If we overran the argument save area, we must use memory
4198 // (this check catches arguments passed partially in memory)
4199 if (ArgOffset > LinkageSize + ParamAreaSize)
4200 UseMemory = true;
4201
4202 // However, if the argument is actually passed in an FPR or a VR,
4203 // we don't use memory after all.
4204 if (!Flags.isByVal()) {
4205 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4206 if (AvailableFPRs > 0) {
4207 --AvailableFPRs;
4208 return false;
4209 }
4210 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4211 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4212 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4213 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4214 if (AvailableVRs > 0) {
4215 --AvailableVRs;
4216 return false;
4217 }
4218 }
4219
4220 return UseMemory;
4221}
4222
4223/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4224/// ensure minimum alignment required for target.
4226 unsigned NumBytes) {
4227 return alignTo(NumBytes, Lowering->getStackAlign());
4228}
4229
4230SDValue PPCTargetLowering::LowerFormalArguments(
4231 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4232 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4233 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4234 if (Subtarget.isAIXABI())
4235 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4236 InVals);
4237 if (Subtarget.is64BitELFABI())
4238 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4239 InVals);
4240 assert(Subtarget.is32BitELFABI());
4241 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4242 InVals);
4243}
4244
4245SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4246 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4247 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4248 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4249
4250 // 32-bit SVR4 ABI Stack Frame Layout:
4251 // +-----------------------------------+
4252 // +--> | Back chain |
4253 // | +-----------------------------------+
4254 // | | Floating-point register save area |
4255 // | +-----------------------------------+
4256 // | | General register save area |
4257 // | +-----------------------------------+
4258 // | | CR save word |
4259 // | +-----------------------------------+
4260 // | | VRSAVE save word |
4261 // | +-----------------------------------+
4262 // | | Alignment padding |
4263 // | +-----------------------------------+
4264 // | | Vector register save area |
4265 // | +-----------------------------------+
4266 // | | Local variable space |
4267 // | +-----------------------------------+
4268 // | | Parameter list area |
4269 // | +-----------------------------------+
4270 // | | LR save word |
4271 // | +-----------------------------------+
4272 // SP--> +--- | Back chain |
4273 // +-----------------------------------+
4274 //
4275 // Specifications:
4276 // System V Application Binary Interface PowerPC Processor Supplement
4277 // AltiVec Technology Programming Interface Manual
4278
4280 MachineFrameInfo &MFI = MF.getFrameInfo();
4281 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4282
4283 EVT PtrVT = getPointerTy(MF.getDataLayout());
4284 // Potential tail calls could cause overwriting of argument stack slots.
4285 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4286 (CallConv == CallingConv::Fast));
4287 const Align PtrAlign(4);
4288
4289 // Assign locations to all of the incoming arguments.
4291 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4292 *DAG.getContext());
4293
4294 // Reserve space for the linkage area on the stack.
4295 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4296 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4297 if (useSoftFloat())
4298 CCInfo.PreAnalyzeFormalArguments(Ins);
4299
4300 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4301 CCInfo.clearWasPPCF128();
4302
4303 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4304 CCValAssign &VA = ArgLocs[i];
4305
4306 // Arguments stored in registers.
4307 if (VA.isRegLoc()) {
4308 const TargetRegisterClass *RC;
4309 EVT ValVT = VA.getValVT();
4310
4311 switch (ValVT.getSimpleVT().SimpleTy) {
4312 default:
4313 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4314 case MVT::i1:
4315 case MVT::i32:
4316 RC = &PPC::GPRCRegClass;
4317 break;
4318 case MVT::f32:
4319 if (Subtarget.hasP8Vector())
4320 RC = &PPC::VSSRCRegClass;
4321 else if (Subtarget.hasSPE())
4322 RC = &PPC::GPRCRegClass;
4323 else
4324 RC = &PPC::F4RCRegClass;
4325 break;
4326 case MVT::f64:
4327 if (Subtarget.hasVSX())
4328 RC = &PPC::VSFRCRegClass;
4329 else if (Subtarget.hasSPE())
4330 // SPE passes doubles in GPR pairs.
4331 RC = &PPC::GPRCRegClass;
4332 else
4333 RC = &PPC::F8RCRegClass;
4334 break;
4335 case MVT::v16i8:
4336 case MVT::v8i16:
4337 case MVT::v4i32:
4338 RC = &PPC::VRRCRegClass;
4339 break;
4340 case MVT::v4f32:
4341 RC = &PPC::VRRCRegClass;
4342 break;
4343 case MVT::v2f64:
4344 case MVT::v2i64:
4345 RC = &PPC::VRRCRegClass;
4346 break;
4347 }
4348
4349 SDValue ArgValue;
4350 // Transform the arguments stored in physical registers into
4351 // virtual ones.
4352 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4353 assert(i + 1 < e && "No second half of double precision argument");
4354 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4355 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4356 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4357 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4358 if (!Subtarget.isLittleEndian())
4359 std::swap (ArgValueLo, ArgValueHi);
4360 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4361 ArgValueHi);
4362 } else {
4363 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4364 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4365 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4366 if (ValVT == MVT::i1)
4367 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4368 }
4369
4370 InVals.push_back(ArgValue);
4371 } else {
4372 // Argument stored in memory.
4373 assert(VA.isMemLoc());
4374
4375 // Get the extended size of the argument type in stack
4376 unsigned ArgSize = VA.getLocVT().getStoreSize();
4377 // Get the actual size of the argument type
4378 unsigned ObjSize = VA.getValVT().getStoreSize();
4379 unsigned ArgOffset = VA.getLocMemOffset();
4380 // Stack objects in PPC32 are right justified.
4381 ArgOffset += ArgSize - ObjSize;
4382 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4383
4384 // Create load nodes to retrieve arguments from the stack.
4385 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4386 InVals.push_back(
4387 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4388 }
4389 }
4390
4391 // Assign locations to all of the incoming aggregate by value arguments.
4392 // Aggregates passed by value are stored in the local variable space of the
4393 // caller's stack frame, right above the parameter list area.
4394 SmallVector<CCValAssign, 16> ByValArgLocs;
4395 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4396 ByValArgLocs, *DAG.getContext());
4397
4398 // Reserve stack space for the allocations in CCInfo.
4399 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4400
4401 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4402
4403 // Area that is at least reserved in the caller of this function.
4404 unsigned MinReservedArea = CCByValInfo.getStackSize();
4405 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4406
4407 // Set the size that is at least reserved in caller of this function. Tail
4408 // call optimized function's reserved stack space needs to be aligned so that
4409 // taking the difference between two stack areas will result in an aligned
4410 // stack.
4411 MinReservedArea =
4412 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4413 FuncInfo->setMinReservedArea(MinReservedArea);
4414
4416
4417 // If the function takes variable number of arguments, make a frame index for
4418 // the start of the first vararg value... for expansion of llvm.va_start.
4419 if (isVarArg) {
4420 static const MCPhysReg GPArgRegs[] = {
4421 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4422 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4423 };
4424 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4425
4426 static const MCPhysReg FPArgRegs[] = {
4427 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4428 PPC::F8
4429 };
4430 unsigned NumFPArgRegs = std::size(FPArgRegs);
4431
4432 if (useSoftFloat() || hasSPE())
4433 NumFPArgRegs = 0;
4434
4435 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4436 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4437
4438 // Make room for NumGPArgRegs and NumFPArgRegs.
4439 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4440 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4441
4443 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4444
4445 FuncInfo->setVarArgsFrameIndex(
4446 MFI.CreateStackObject(Depth, Align(8), false));
4447 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4448
4449 // The fixed integer arguments of a variadic function are stored to the
4450 // VarArgsFrameIndex on the stack so that they may be loaded by
4451 // dereferencing the result of va_next.
4452 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4453 // Get an existing live-in vreg, or add a new one.
4454 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4455 if (!VReg)
4456 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4457
4458 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4459 SDValue Store =
4460 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4461 MemOps.push_back(Store);
4462 // Increment the address by four for the next argument to store
4463 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4464 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4465 }
4466
4467 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4468 // is set.
4469 // The double arguments are stored to the VarArgsFrameIndex
4470 // on the stack.
4471 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4472 // Get an existing live-in vreg, or add a new one.
4473 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4474 if (!VReg)
4475 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4476
4477 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4478 SDValue Store =
4479 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4480 MemOps.push_back(Store);
4481 // Increment the address by eight for the next argument to store
4482 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4483 PtrVT);
4484 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4485 }
4486 }
4487
4488 if (!MemOps.empty())
4489 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4490
4491 return Chain;
4492}
4493
4494// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4495// value to MVT::i64 and then truncate to the correct register size.
4496SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4497 EVT ObjectVT, SelectionDAG &DAG,
4498 SDValue ArgVal,
4499 const SDLoc &dl) const {
4500 if (Flags.isSExt())
4501 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4502 DAG.getValueType(ObjectVT));
4503 else if (Flags.isZExt())
4504 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4505 DAG.getValueType(ObjectVT));
4506
4507 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4508}
4509
4510SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4511 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4512 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4513 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4514 // TODO: add description of PPC stack frame format, or at least some docs.
4515 //
4516 bool isELFv2ABI = Subtarget.isELFv2ABI();
4517 bool isLittleEndian = Subtarget.isLittleEndian();
4519 MachineFrameInfo &MFI = MF.getFrameInfo();
4520 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4521
4522 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4523 "fastcc not supported on varargs functions");
4524
4525 EVT PtrVT = getPointerTy(MF.getDataLayout());
4526 // Potential tail calls could cause overwriting of argument stack slots.
4527 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4528 (CallConv == CallingConv::Fast));
4529 unsigned PtrByteSize = 8;
4530 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531
4532 static const MCPhysReg GPR[] = {
4533 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535 };
4536 static const MCPhysReg VR[] = {
4537 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539 };
4540
4541 const unsigned Num_GPR_Regs = std::size(GPR);
4542 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4543 const unsigned Num_VR_Regs = std::size(VR);
4544
4545 // Do a first pass over the arguments to determine whether the ABI
4546 // guarantees that our caller has allocated the parameter save area
4547 // on its stack frame. In the ELFv1 ABI, this is always the case;
4548 // in the ELFv2 ABI, it is true if this is a vararg function or if
4549 // any parameter is located in a stack slot.
4550
4551 bool HasParameterArea = !isELFv2ABI || isVarArg;
4552 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4553 unsigned NumBytes = LinkageSize;
4554 unsigned AvailableFPRs = Num_FPR_Regs;
4555 unsigned AvailableVRs = Num_VR_Regs;
4556 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4557 if (Ins[i].Flags.isNest())
4558 continue;
4559
4560 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4561 PtrByteSize, LinkageSize, ParamAreaSize,
4562 NumBytes, AvailableFPRs, AvailableVRs))
4563 HasParameterArea = true;
4564 }
4565
4566 // Add DAG nodes to load the arguments or copy them out of registers. On
4567 // entry to a function on PPC, the arguments start after the linkage area,
4568 // although the first ones are often in registers.
4569
4570 unsigned ArgOffset = LinkageSize;
4571 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4574 unsigned CurArgIdx = 0;
4575 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4576 SDValue ArgVal;
4577 bool needsLoad = false;
4578 EVT ObjectVT = Ins[ArgNo].VT;
4579 EVT OrigVT = Ins[ArgNo].ArgVT;
4580 unsigned ObjSize = ObjectVT.getStoreSize();
4581 unsigned ArgSize = ObjSize;
4582 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4583 if (Ins[ArgNo].isOrigArg()) {
4584 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4585 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4586 }
4587 // We re-align the argument offset for each argument, except when using the
4588 // fast calling convention, when we need to make sure we do that only when
4589 // we'll actually use a stack slot.
4590 unsigned CurArgOffset;
4591 Align Alignment;
4592 auto ComputeArgOffset = [&]() {
4593 /* Respect alignment of argument on the stack. */
4594 Alignment =
4595 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4596 ArgOffset = alignTo(ArgOffset, Alignment);
4597 CurArgOffset = ArgOffset;
4598 };
4599
4600 if (CallConv != CallingConv::Fast) {
4601 ComputeArgOffset();
4602
4603 /* Compute GPR index associated with argument offset. */
4604 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4605 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4606 }
4607
4608 // FIXME the codegen can be much improved in some cases.
4609 // We do not have to keep everything in memory.
4610 if (Flags.isByVal()) {
4611 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4612
4613 if (CallConv == CallingConv::Fast)
4614 ComputeArgOffset();
4615
4616 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4617 ObjSize = Flags.getByValSize();
4618 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4619 // Empty aggregate parameters do not take up registers. Examples:
4620 // struct { } a;
4621 // union { } b;
4622 // int c[0];
4623 // etc. However, we have to provide a place-holder in InVals, so
4624 // pretend we have an 8-byte item at the current address for that
4625 // purpose.
4626 if (!ObjSize) {
4627 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4628 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629 InVals.push_back(FIN);
4630 continue;
4631 }
4632
4633 // Create a stack object covering all stack doublewords occupied
4634 // by the argument. If the argument is (fully or partially) on
4635 // the stack, or if the argument is fully in registers but the
4636 // caller has allocated the parameter save anyway, we can refer
4637 // directly to the caller's stack frame. Otherwise, create a
4638 // local copy in our own frame.
4639 int FI;
4640 if (HasParameterArea ||
4641 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4642 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4643 else
4644 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4645 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4646
4647 // Handle aggregates smaller than 8 bytes.
4648 if (ObjSize < PtrByteSize) {
4649 // The value of the object is its address, which differs from the
4650 // address of the enclosing doubleword on big-endian systems.
4651 SDValue Arg = FIN;
4652 if (!isLittleEndian) {
4653 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4654 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4655 }
4656 InVals.push_back(Arg);
4657
4658 if (GPR_idx != Num_GPR_Regs) {
4659 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4660 FuncInfo->addLiveInAttr(VReg, Flags);
4661 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4662 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4663 SDValue Store =
4664 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4665 MachinePointerInfo(&*FuncArg), ObjType);
4666 MemOps.push_back(Store);
4667 }
4668 // Whether we copied from a register or not, advance the offset
4669 // into the parameter save area by a full doubleword.
4670 ArgOffset += PtrByteSize;
4671 continue;
4672 }
4673
4674 // The value of the object is its address, which is the address of
4675 // its first stack doubleword.
4676 InVals.push_back(FIN);
4677
4678 // Store whatever pieces of the object are in registers to memory.
4679 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4680 if (GPR_idx == Num_GPR_Regs)
4681 break;
4682
4683 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4684 FuncInfo->addLiveInAttr(VReg, Flags);
4685 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4686 SDValue Addr = FIN;
4687 if (j) {
4688 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4689 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4690 }
4691 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4692 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4693 SDValue Store =
4694 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4695 MachinePointerInfo(&*FuncArg, j), ObjType);
4696 MemOps.push_back(Store);
4697 ++GPR_idx;
4698 }
4699 ArgOffset += ArgSize;
4700 continue;
4701 }
4702
4703 switch (ObjectVT.getSimpleVT().SimpleTy) {
4704 default: llvm_unreachable("Unhandled argument type!");
4705 case MVT::i1:
4706 case MVT::i32:
4707 case MVT::i64:
4708 if (Flags.isNest()) {
4709 // The 'nest' parameter, if any, is passed in R11.
4710 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4711 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4712
4713 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4714 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4715
4716 break;
4717 }
4718
4719 // These can be scalar arguments or elements of an integer array type
4720 // passed directly. Clang may use those instead of "byval" aggregate
4721 // types to avoid forcing arguments to memory unnecessarily.
4722 if (GPR_idx != Num_GPR_Regs) {
4723 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4724 FuncInfo->addLiveInAttr(VReg, Flags);
4725 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4726
4727 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4728 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4729 // value to MVT::i64 and then truncate to the correct register size.
4730 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4731 } else {
4732 if (CallConv == CallingConv::Fast)
4733 ComputeArgOffset();
4734
4735 needsLoad = true;
4736 ArgSize = PtrByteSize;
4737 }
4738 if (CallConv != CallingConv::Fast || needsLoad)
4739 ArgOffset += 8;
4740 break;
4741
4742 case MVT::f32:
4743 case MVT::f64:
4744 // These can be scalar arguments or elements of a float array type
4745 // passed directly. The latter are used to implement ELFv2 homogenous
4746 // float aggregates.
4747 if (FPR_idx != Num_FPR_Regs) {
4748 unsigned VReg;
4749
4750 if (ObjectVT == MVT::f32)
4751 VReg = MF.addLiveIn(FPR[FPR_idx],
4752 Subtarget.hasP8Vector()
4753 ? &PPC::VSSRCRegClass
4754 : &PPC::F4RCRegClass);
4755 else
4756 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4757 ? &PPC::VSFRCRegClass
4758 : &PPC::F8RCRegClass);
4759
4760 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4761 ++FPR_idx;
4762 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4763 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4764 // once we support fp <-> gpr moves.
4765
4766 // This can only ever happen in the presence of f32 array types,
4767 // since otherwise we never run out of FPRs before running out
4768 // of GPRs.
4769 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4770 FuncInfo->addLiveInAttr(VReg, Flags);
4771 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4772
4773 if (ObjectVT == MVT::f32) {
4774 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4775 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4776 DAG.getConstant(32, dl, MVT::i32));
4777 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4778 }
4779
4780 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4781 } else {
4782 if (CallConv == CallingConv::Fast)
4783 ComputeArgOffset();
4784
4785 needsLoad = true;
4786 }
4787
4788 // When passing an array of floats, the array occupies consecutive
4789 // space in the argument area; only round up to the next doubleword
4790 // at the end of the array. Otherwise, each float takes 8 bytes.
4791 if (CallConv != CallingConv::Fast || needsLoad) {
4792 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4793 ArgOffset += ArgSize;
4794 if (Flags.isInConsecutiveRegsLast())
4795 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4796 }
4797 break;
4798 case MVT::v4f32:
4799 case MVT::v4i32:
4800 case MVT::v8i16:
4801 case MVT::v16i8:
4802 case MVT::v2f64:
4803 case MVT::v2i64:
4804 case MVT::v1i128:
4805 case MVT::f128:
4806 // These can be scalar arguments or elements of a vector array type
4807 // passed directly. The latter are used to implement ELFv2 homogenous
4808 // vector aggregates.
4809 if (VR_idx != Num_VR_Regs) {
4810 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4811 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4812 ++VR_idx;
4813 } else {
4814 if (CallConv == CallingConv::Fast)
4815 ComputeArgOffset();
4816 needsLoad = true;
4817 }
4818 if (CallConv != CallingConv::Fast || needsLoad)
4819 ArgOffset += 16;
4820 break;
4821 }
4822
4823 // We need to load the argument to a virtual register if we determined
4824 // above that we ran out of physical registers of the appropriate type.
4825 if (needsLoad) {
4826 if (ObjSize < ArgSize && !isLittleEndian)
4827 CurArgOffset += ArgSize - ObjSize;
4828 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4829 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4830 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4831 }
4832
4833 InVals.push_back(ArgVal);
4834 }
4835
4836 // Area that is at least reserved in the caller of this function.
4837 unsigned MinReservedArea;
4838 if (HasParameterArea)
4839 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4840 else
4841 MinReservedArea = LinkageSize;
4842
4843 // Set the size that is at least reserved in caller of this function. Tail
4844 // call optimized functions' reserved stack space needs to be aligned so that
4845 // taking the difference between two stack areas will result in an aligned
4846 // stack.
4847 MinReservedArea =
4848 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4849 FuncInfo->setMinReservedArea(MinReservedArea);
4850
4851 // If the function takes variable number of arguments, make a frame index for
4852 // the start of the first vararg value... for expansion of llvm.va_start.
4853 // On ELFv2ABI spec, it writes:
4854 // C programs that are intended to be *portable* across different compilers
4855 // and architectures must use the header file <stdarg.h> to deal with variable
4856 // argument lists.
4857 if (isVarArg && MFI.hasVAStart()) {
4858 int Depth = ArgOffset;
4859
4860 FuncInfo->setVarArgsFrameIndex(
4861 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4862 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4863
4864 // If this function is vararg, store any remaining integer argument regs
4865 // to their spots on the stack so that they may be loaded by dereferencing
4866 // the result of va_next.
4867 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4868 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4869 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4870 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4871 SDValue Store =
4872 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4873 MemOps.push_back(Store);
4874 // Increment the address by four for the next argument to store
4875 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4876 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4877 }
4878 }
4879
4880 if (!MemOps.empty())
4881 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4882
4883 return Chain;
4884}
4885
4886/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4887/// adjusted to accommodate the arguments for the tailcall.
4888static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4889 unsigned ParamSize) {
4890
4891 if (!isTailCall) return 0;
4892
4894 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4895 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4896 // Remember only if the new adjustment is bigger.
4897 if (SPDiff < FI->getTailCallSPDelta())
4898 FI->setTailCallSPDelta(SPDiff);
4899
4900 return SPDiff;
4901}
4902
4903static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4904
4905static bool callsShareTOCBase(const Function *Caller,
4906 const GlobalValue *CalleeGV,
4907 const TargetMachine &TM) {
4908 // It does not make sense to call callsShareTOCBase() with a caller that
4909 // is PC Relative since PC Relative callers do not have a TOC.
4910#ifndef NDEBUG
4911 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4912 assert(!STICaller->isUsingPCRelativeCalls() &&
4913 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4914#endif
4915
4916 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4917 // don't have enough information to determine if the caller and callee share
4918 // the same TOC base, so we have to pessimistically assume they don't for
4919 // correctness.
4920 if (!CalleeGV)
4921 return false;
4922
4923 // If the callee is preemptable, then the static linker will use a plt-stub
4924 // which saves the toc to the stack, and needs a nop after the call
4925 // instruction to convert to a toc-restore.
4926 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4927 return false;
4928
4929 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4930 // We may need a TOC restore in the situation where the caller requires a
4931 // valid TOC but the callee is PC Relative and does not.
4932 const Function *F = dyn_cast<Function>(CalleeGV);
4933 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4934
4935 // If we have an Alias we can try to get the function from there.
4936 if (Alias) {
4937 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4938 F = dyn_cast<Function>(GlobalObj);
4939 }
4940
4941 // If we still have no valid function pointer we do not have enough
4942 // information to determine if the callee uses PC Relative calls so we must
4943 // assume that it does.
4944 if (!F)
4945 return false;
4946
4947 // If the callee uses PC Relative we cannot guarantee that the callee won't
4948 // clobber the TOC of the caller and so we must assume that the two
4949 // functions do not share a TOC base.
4950 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4951 if (STICallee->isUsingPCRelativeCalls())
4952 return false;
4953
4954 // If the GV is not a strong definition then we need to assume it can be
4955 // replaced by another function at link time. The function that replaces
4956 // it may not share the same TOC as the caller since the callee may be
4957 // replaced by a PC Relative version of the same function.
4958 if (!CalleeGV->isStrongDefinitionForLinker())
4959 return false;
4960
4961 // The medium and large code models are expected to provide a sufficiently
4962 // large TOC to provide all data addressing needs of a module with a
4963 // single TOC.
4964 if (CodeModel::Medium == TM.getCodeModel() ||
4965 CodeModel::Large == TM.getCodeModel())
4966 return true;
4967
4968 // Any explicitly-specified sections and section prefixes must also match.
4969 // Also, if we're using -ffunction-sections, then each function is always in
4970 // a different section (the same is true for COMDAT functions).
4971 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4972 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4973 return false;
4974 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4975 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4976 return false;
4977 }
4978
4979 return true;
4980}
4981
4982static bool
4984 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4985 assert(Subtarget.is64BitELFABI());
4986
4987 const unsigned PtrByteSize = 8;
4988 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4989
4990 static const MCPhysReg GPR[] = {
4991 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4992 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4993 };
4994 static const MCPhysReg VR[] = {
4995 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4996 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4997 };
4998
4999 const unsigned NumGPRs = std::size(GPR);
5000 const unsigned NumFPRs = 13;
5001 const unsigned NumVRs = std::size(VR);
5002 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5003
5004 unsigned NumBytes = LinkageSize;
5005 unsigned AvailableFPRs = NumFPRs;
5006 unsigned AvailableVRs = NumVRs;
5007
5008 for (const ISD::OutputArg& Param : Outs) {
5009 if (Param.Flags.isNest()) continue;
5010
5011 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5012 LinkageSize, ParamAreaSize, NumBytes,
5013 AvailableFPRs, AvailableVRs))
5014 return true;
5015 }
5016 return false;
5017}
5018
5019static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5020 if (CB.arg_size() != CallerFn->arg_size())
5021 return false;
5022
5023 auto CalleeArgIter = CB.arg_begin();
5024 auto CalleeArgEnd = CB.arg_end();
5025 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5026
5027 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5028 const Value* CalleeArg = *CalleeArgIter;
5029 const Value* CallerArg = &(*CallerArgIter);
5030 if (CalleeArg == CallerArg)
5031 continue;
5032
5033 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5034 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5035 // }
5036 // 1st argument of callee is undef and has the same type as caller.
5037 if (CalleeArg->getType() == CallerArg->getType() &&
5038 isa<UndefValue>(CalleeArg))
5039 continue;
5040
5041 return false;
5042 }
5043
5044 return true;
5045}
5046
5047// Returns true if TCO is possible between the callers and callees
5048// calling conventions.
5049static bool
5051 CallingConv::ID CalleeCC) {
5052 // Tail calls are possible with fastcc and ccc.
5053 auto isTailCallableCC = [] (CallingConv::ID CC){
5054 return CC == CallingConv::C || CC == CallingConv::Fast;
5055 };
5056 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5057 return false;
5058
5059 // We can safely tail call both fastcc and ccc callees from a c calling
5060 // convention caller. If the caller is fastcc, we may have less stack space
5061 // than a non-fastcc caller with the same signature so disable tail-calls in
5062 // that case.
5063 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5064}
5065
5066bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5067 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5068 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5070 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5071 bool isCalleeExternalSymbol) const {
5072 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5073
5074 if (DisableSCO && !TailCallOpt) return false;
5075
5076 // Variadic argument functions are not supported.
5077 if (isVarArg) return false;
5078
5079 // Check that the calling conventions are compatible for tco.
5080 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5081 return false;
5082
5083 // Caller contains any byval parameter is not supported.
5084 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5085 return false;
5086
5087 // Callee contains any byval parameter is not supported, too.
5088 // Note: This is a quick work around, because in some cases, e.g.
5089 // caller's stack size > callee's stack size, we are still able to apply
5090 // sibling call optimization. For example, gcc is able to do SCO for caller1
5091 // in the following example, but not for caller2.
5092 // struct test {
5093 // long int a;
5094 // char ary[56];
5095 // } gTest;
5096 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5097 // b->a = v.a;
5098 // return 0;
5099 // }
5100 // void caller1(struct test a, struct test c, struct test *b) {
5101 // callee(gTest, b); }
5102 // void caller2(struct test *b) { callee(gTest, b); }
5103 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5104 return false;
5105
5106 // If callee and caller use different calling conventions, we cannot pass
5107 // parameters on stack since offsets for the parameter area may be different.
5108 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5109 return false;
5110
5111 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5112 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5113 // callee potentially have different TOC bases then we cannot tail call since
5114 // we need to restore the TOC pointer after the call.
5115 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5116 // We cannot guarantee this for indirect calls or calls to external functions.
5117 // When PC-Relative addressing is used, the concept of the TOC is no longer
5118 // applicable so this check is not required.
5119 // Check first for indirect calls.
5120 if (!Subtarget.isUsingPCRelativeCalls() &&
5121 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5122 return false;
5123
5124 // Check if we share the TOC base.
5125 if (!Subtarget.isUsingPCRelativeCalls() &&
5126 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5127 return false;
5128
5129 // TCO allows altering callee ABI, so we don't have to check further.
5130 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5131 return true;
5132
5133 if (DisableSCO) return false;
5134
5135 // If callee use the same argument list that caller is using, then we can
5136 // apply SCO on this case. If it is not, then we need to check if callee needs
5137 // stack for passing arguments.
5138 // PC Relative tail calls may not have a CallBase.
5139 // If there is no CallBase we cannot verify if we have the same argument
5140 // list so assume that we don't have the same argument list.
5141 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5142 needStackSlotPassParameters(Subtarget, Outs))
5143 return false;
5144 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5145 return false;
5146
5147 return true;
5148}
5149
5150/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5151/// for tail call optimization. Targets which want to do tail call
5152/// optimization should implement this function.
5153bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5154 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5155 CallingConv::ID CallerCC, bool isVarArg,
5156 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5157 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5158 return false;
5159
5160 // Variable argument functions are not supported.
5161 if (isVarArg)
5162 return false;
5163
5164 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5165 // Functions containing by val parameters are not supported.
5166 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5167 return false;
5168
5169 // Non-PIC/GOT tail calls are supported.
5170 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5171 return true;
5172
5173 // At the moment we can only do local tail calls (in same module, hidden
5174 // or protected) if we are generating PIC.
5175 if (CalleeGV)
5176 return CalleeGV->hasHiddenVisibility() ||
5177 CalleeGV->hasProtectedVisibility();
5178 }
5179
5180 return false;
5181}
5182
5183/// isCallCompatibleAddress - Return the immediate to use if the specified
5184/// 32-bit value is representable in the immediate field of a BxA instruction.
5186 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5187 if (!C) return nullptr;
5188
5189 int Addr = C->getZExtValue();
5190 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5191 SignExtend32<26>(Addr) != Addr)
5192 return nullptr; // Top 6 bits have to be sext of immediate.
5193
5194 return DAG
5196 (int)C->getZExtValue() >> 2, SDLoc(Op),
5198 .getNode();
5199}
5200
5201namespace {
5202
5203struct TailCallArgumentInfo {
5204 SDValue Arg;
5205 SDValue FrameIdxOp;
5206 int FrameIdx = 0;
5207
5208 TailCallArgumentInfo() = default;
5209};
5210
5211} // end anonymous namespace
5212
5213/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5215 SelectionDAG &DAG, SDValue Chain,
5216 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5217 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5218 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5219 SDValue Arg = TailCallArgs[i].Arg;
5220 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5221 int FI = TailCallArgs[i].FrameIdx;
5222 // Store relative to framepointer.
5223 MemOpChains.push_back(DAG.getStore(
5224 Chain, dl, Arg, FIN,
5226 }
5227}
5228
5229/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5230/// the appropriate stack slot for the tail call optimized function call.
5232 SDValue OldRetAddr, SDValue OldFP,
5233 int SPDiff, const SDLoc &dl) {
5234 if (SPDiff) {
5235 // Calculate the new stack slot for the return address.
5237 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5238 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5239 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5240 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5241 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5242 NewRetAddrLoc, true);
5243 SDValue NewRetAddrFrIdx =
5244 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5245 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5246 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5247 }
5248 return Chain;
5249}
5250
5251/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5252/// the position of the argument.
5254 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5255 int SPDiff, unsigned ArgOffset,
5256 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5257 int Offset = ArgOffset + SPDiff;
5258 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5259 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5260 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5261 SDValue FIN = DAG.getFrameIndex(FI, VT);
5262 TailCallArgumentInfo Info;
5263 Info.Arg = Arg;
5264 Info.FrameIdxOp = FIN;
5265 Info.FrameIdx = FI;
5266 TailCallArguments.push_back(Info);
5267}
5268
5269/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5270/// stack slot. Returns the chain as result and the loaded frame pointers in
5271/// LROpOut/FPOpout. Used when tail calling.
5272SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5273 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5274 SDValue &FPOpOut, const SDLoc &dl) const {
5275 if (SPDiff) {
5276 // Load the LR and FP stack slot for later adjusting.
5277 LROpOut = getReturnAddrFrameIndex(DAG);
5278 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5280 Chain = SDValue(LROpOut.getNode(), 1);
5281 }
5282 return Chain;
5283}
5284
5285/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5286/// by "Src" to address "Dst" of size "Size". Alignment information is
5287/// specified by the specific parameter attribute. The copy will be passed as
5288/// a byval function parameter.
5289/// Sometimes what we are copying is the end of a larger object, the part that
5290/// does not fit in registers.
5292 SDValue Chain, ISD::ArgFlagsTy Flags,
5293 SelectionDAG &DAG, const SDLoc &dl) {
5294 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5295 return DAG.getMemcpy(
5296 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5297 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5298}
5299
5300/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5301/// tail calls.
5303 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5304 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5305 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5306 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5308 if (!isTailCall) {
5309 if (isVector) {
5310 SDValue StackPtr;
5311 if (isPPC64)
5312 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5313 else
5314 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5315 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5316 DAG.getConstant(ArgOffset, dl, PtrVT));
5317 }
5318 MemOpChains.push_back(
5319 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5320 // Calculate and remember argument location.
5321 } else
5322 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5323 TailCallArguments);
5324}
5325
5326static void
5328 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5329 SDValue FPOp,
5330 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5331 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5332 // might overwrite each other in case of tail call optimization.
5333 SmallVector<SDValue, 8> MemOpChains2;
5334 // Do not flag preceding copytoreg stuff together with the following stuff.
5335 InGlue = SDValue();
5336 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5337 MemOpChains2, dl);
5338 if (!MemOpChains2.empty())
5339 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5340
5341 // Store the return address to the appropriate stack slot.
5342 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5343
5344 // Emit callseq_end just before tailcall node.
5345 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5346 InGlue = Chain.getValue(1);
5347}
5348
5349// Is this global address that of a function that can be called by name? (as
5350// opposed to something that must hold a descriptor for an indirect call).
5351static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5352 if (GV) {
5353 if (GV->isThreadLocal())
5354 return false;
5355
5356 return GV->getValueType()->isFunctionTy();
5357 }
5358
5359 return false;
5360}
5361
5362SDValue PPCTargetLowering::LowerCallResult(
5363 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5364 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5365 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5367 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5368 *DAG.getContext());
5369
5370 CCRetInfo.AnalyzeCallResult(
5371 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5373 : RetCC_PPC);
5374
5375 // Copy all of the result registers out of their specified physreg.
5376 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5377 CCValAssign &VA = RVLocs[i];
5378 assert(VA.isRegLoc() && "Can only return in registers!");
5379
5380 SDValue Val;
5381
5382 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5383 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5384 InGlue);
5385 Chain = Lo.getValue(1);
5386 InGlue = Lo.getValue(2);
5387 VA = RVLocs[++i]; // skip ahead to next loc
5388 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5389 InGlue);
5390 Chain = Hi.getValue(1);
5391 InGlue = Hi.getValue(2);
5392 if (!Subtarget.isLittleEndian())
5393 std::swap (Lo, Hi);
5394 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5395 } else {
5396 Val = DAG.getCopyFromReg(Chain, dl,
5397 VA.getLocReg(), VA.getLocVT(), InGlue);
5398 Chain = Val.getValue(1);
5399 InGlue = Val.getValue(2);
5400 }
5401
5402 switch (VA.getLocInfo()) {
5403 default: llvm_unreachable("Unknown loc info!");
5404 case CCValAssign::Full: break;
5405 case CCValAssign::AExt:
5406 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5407 break;
5408 case CCValAssign::ZExt:
5409 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5410 DAG.getValueType(VA.getValVT()));
5411 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5412 break;
5413 case CCValAssign::SExt:
5414 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5415 DAG.getValueType(VA.getValVT()));
5416 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5417 break;
5418 }
5419
5420 InVals.push_back(Val);
5421 }
5422
5423 return Chain;
5424}
5425
5426static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5427 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5428 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5429 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5430
5431 // PatchPoint calls are not indirect.
5432 if (isPatchPoint)
5433 return false;
5434
5435 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5436 return false;
5437
5438 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5439 // becuase the immediate function pointer points to a descriptor instead of
5440 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5441 // pointer immediate points to the global entry point, while the BLA would
5442 // need to jump to the local entry point (see rL211174).
5443 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5444 isBLACompatibleAddress(Callee, DAG))
5445 return false;
5446
5447 return true;
5448}
5449
5450// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5451static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5452 return Subtarget.isAIXABI() ||
5453 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5454}
5455
5457 const Function &Caller, const SDValue &Callee,
5458 const PPCSubtarget &Subtarget,
5459 const TargetMachine &TM,
5460 bool IsStrictFPCall = false) {
5461 if (CFlags.IsTailCall)
5462 return PPCISD::TC_RETURN;
5463
5464 unsigned RetOpc = 0;
5465 // This is a call through a function pointer.
5466 if (CFlags.IsIndirect) {
5467 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5468 // indirect calls. The save of the caller's TOC pointer to the stack will be
5469 // inserted into the DAG as part of call lowering. The restore of the TOC
5470 // pointer is modeled by using a pseudo instruction for the call opcode that
5471 // represents the 2 instruction sequence of an indirect branch and link,
5472 // immediately followed by a load of the TOC pointer from the stack save
5473 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5474 // as it is not saved or used.
5476 : PPCISD::BCTRL;
5477 } else if (Subtarget.isUsingPCRelativeCalls()) {
5478 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5479 RetOpc = PPCISD::CALL_NOTOC;
5480 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5481 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5482 // immediately following the call instruction if the caller and callee may
5483 // have different TOC bases. At link time if the linker determines the calls
5484 // may not share a TOC base, the call is redirected to a trampoline inserted
5485 // by the linker. The trampoline will (among other things) save the callers
5486 // TOC pointer at an ABI designated offset in the linkage area and the
5487 // linker will rewrite the nop to be a load of the TOC pointer from the
5488 // linkage area into gpr2.
5489 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5490 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5491 RetOpc =
5492 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5493 } else
5494 RetOpc = PPCISD::CALL;
5495 if (IsStrictFPCall) {
5496 switch (RetOpc) {
5497 default:
5498 llvm_unreachable("Unknown call opcode");
5501 break;
5502 case PPCISD::BCTRL:
5503 RetOpc = PPCISD::BCTRL_RM;
5504 break;
5505 case PPCISD::CALL_NOTOC:
5506 RetOpc = PPCISD::CALL_NOTOC_RM;
5507 break;
5508 case PPCISD::CALL:
5509 RetOpc = PPCISD::CALL_RM;
5510 break;
5511 case PPCISD::CALL_NOP:
5512 RetOpc = PPCISD::CALL_NOP_RM;
5513 break;
5514 }
5515 }
5516 return RetOpc;
5517}
5518
5519static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5520 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5521 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5522 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5523 return SDValue(Dest, 0);
5524
5525 // Returns true if the callee is local, and false otherwise.
5526 auto isLocalCallee = [&]() {
5527 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5528 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5529
5530 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5531 !isa_and_nonnull<GlobalIFunc>(GV);
5532 };
5533
5534 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5535 // a static relocation model causes some versions of GNU LD (2.17.50, at
5536 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5537 // built with secure-PLT.
5538 bool UsePlt =
5539 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5541
5542 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5543 const TargetMachine &TM = Subtarget.getTargetMachine();
5544 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5545 MCSymbolXCOFF *S =
5546 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5547
5549 return DAG.getMCSymbol(S, PtrVT);
5550 };
5551
5552 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5553 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5554 if (isFunctionGlobalAddress(GV)) {
5555 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5556
5557 if (Subtarget.isAIXABI()) {
5558 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5559 return getAIXFuncEntryPointSymbolSDNode(GV);
5560 }
5561 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5562 UsePlt ? PPCII::MO_PLT : 0);
5563 }
5564
5565 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5566 const char *SymName = S->getSymbol();
5567 if (Subtarget.isAIXABI()) {
5568 // If there exists a user-declared function whose name is the same as the
5569 // ExternalSymbol's, then we pick up the user-declared version.
5571 if (const Function *F =
5572 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5573 return getAIXFuncEntryPointSymbolSDNode(F);
5574
5575 // On AIX, direct function calls reference the symbol for the function's
5576 // entry point, which is named by prepending a "." before the function's
5577 // C-linkage name. A Qualname is returned here because an external
5578 // function entry point is a csect with XTY_ER property.
5579 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5580 auto &Context = DAG.getMachineFunction().getContext();
5581 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5582 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5584 return Sec->getQualNameSymbol();
5585 };
5586
5587 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5588 }
5589 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5590 UsePlt ? PPCII::MO_PLT : 0);
5591 }
5592
5593 // No transformation needed.
5594 assert(Callee.getNode() && "What no callee?");
5595 return Callee;
5596}
5597
5599 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5600 "Expected a CALLSEQ_STARTSDNode.");
5601
5602 // The last operand is the chain, except when the node has glue. If the node
5603 // has glue, then the last operand is the glue, and the chain is the second
5604 // last operand.
5605 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5606 if (LastValue.getValueType() != MVT::Glue)
5607 return LastValue;
5608
5609 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5610}
5611
5612// Creates the node that moves a functions address into the count register
5613// to prepare for an indirect call instruction.
5614static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5615 SDValue &Glue, SDValue &Chain,
5616 const SDLoc &dl) {
5617 SDValue MTCTROps[] = {Chain, Callee, Glue};
5618 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5619 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5620 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5621 // The glue is the second value produced.
5622 Glue = Chain.getValue(1);
5623}
5624
5626 SDValue &Glue, SDValue &Chain,
5627 SDValue CallSeqStart,
5628 const CallBase *CB, const SDLoc &dl,
5629 bool hasNest,
5630 const PPCSubtarget &Subtarget) {
5631 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5632 // entry point, but to the function descriptor (the function entry point
5633 // address is part of the function descriptor though).
5634 // The function descriptor is a three doubleword structure with the
5635 // following fields: function entry point, TOC base address and
5636 // environment pointer.
5637 // Thus for a call through a function pointer, the following actions need
5638 // to be performed:
5639 // 1. Save the TOC of the caller in the TOC save area of its stack
5640 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5641 // 2. Load the address of the function entry point from the function
5642 // descriptor.
5643 // 3. Load the TOC of the callee from the function descriptor into r2.
5644 // 4. Load the environment pointer from the function descriptor into
5645 // r11.
5646 // 5. Branch to the function entry point address.
5647 // 6. On return of the callee, the TOC of the caller needs to be
5648 // restored (this is done in FinishCall()).
5649 //
5650 // The loads are scheduled at the beginning of the call sequence, and the
5651 // register copies are flagged together to ensure that no other
5652 // operations can be scheduled in between. E.g. without flagging the
5653 // copies together, a TOC access in the caller could be scheduled between
5654 // the assignment of the callee TOC and the branch to the callee, which leads
5655 // to incorrect code.
5656
5657 // Start by loading the function address from the descriptor.
5658 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5659 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5663
5664 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5665
5666 // Registers used in building the DAG.
5667 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5668 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5669
5670 // Offsets of descriptor members.
5671 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5672 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5673
5674 const MVT RegVT = Subtarget.getScalarIntVT();
5675 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5676
5677 // One load for the functions entry point address.
5678 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5679 Alignment, MMOFlags);
5680
5681 // One for loading the TOC anchor for the module that contains the called
5682 // function.
5683 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5684 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5685 SDValue TOCPtr =
5686 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5687 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5688
5689 // One for loading the environment pointer.
5690 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5691 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5692 SDValue LoadEnvPtr =
5693 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5694 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5695
5696
5697 // Then copy the newly loaded TOC anchor to the TOC pointer.
5698 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5699 Chain = TOCVal.getValue(0);
5700 Glue = TOCVal.getValue(1);
5701
5702 // If the function call has an explicit 'nest' parameter, it takes the
5703 // place of the environment pointer.
5704 assert((!hasNest || !Subtarget.isAIXABI()) &&
5705 "Nest parameter is not supported on AIX.");
5706 if (!hasNest) {
5707 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5708 Chain = EnvVal.getValue(0);
5709 Glue = EnvVal.getValue(1);
5710 }
5711
5712 // The rest of the indirect call sequence is the same as the non-descriptor
5713 // DAG.
5714 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5715}
5716
5717static void
5719 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5720 SelectionDAG &DAG,
5721 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5722 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5723 const PPCSubtarget &Subtarget) {
5724 const bool IsPPC64 = Subtarget.isPPC64();
5725 // MVT for a general purpose register.
5726 const MVT RegVT = Subtarget.getScalarIntVT();
5727
5728 // First operand is always the chain.
5729 Ops.push_back(Chain);
5730
5731 // If it's a direct call pass the callee as the second operand.
5732 if (!CFlags.IsIndirect)
5733 Ops.push_back(Callee);
5734 else {
5735 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5736
5737 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5738 // on the stack (this would have been done in `LowerCall_64SVR4` or
5739 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5740 // represents both the indirect branch and a load that restores the TOC
5741 // pointer from the linkage area. The operand for the TOC restore is an add
5742 // of the TOC save offset to the stack pointer. This must be the second
5743 // operand: after the chain input but before any other variadic arguments.
5744 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5745 // saved or used.
5746 if (isTOCSaveRestoreRequired(Subtarget)) {
5747 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5748
5749 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5750 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5751 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5752 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5753 Ops.push_back(AddTOC);
5754 }
5755
5756 // Add the register used for the environment pointer.
5757 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5759 RegVT));
5760
5761
5762 // Add CTR register as callee so a bctr can be emitted later.
5763 if (CFlags.IsTailCall)
5764 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5765 }
5766
5767 // If this is a tail call add stack pointer delta.
5768 if (CFlags.IsTailCall)
5769 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5770
5771 // Add argument registers to the end of the list so that they are known live
5772 // into the call.
5773 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5774 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5775 RegsToPass[i].second.getValueType()));
5776
5777 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5778 // no way to mark dependencies as implicit here.
5779 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5780 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5781 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5782 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5783
5784 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5785 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5786 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5787
5788 // Add a register mask operand representing the call-preserved registers.
5789 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5790 const uint32_t *Mask =
5791 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5792 assert(Mask && "Missing call preserved mask for calling convention");
5793 Ops.push_back(DAG.getRegisterMask(Mask));
5794
5795 // If the glue is valid, it is the last operand.
5796 if (Glue.getNode())
5797 Ops.push_back(Glue);
5798}
5799
5800SDValue PPCTargetLowering::FinishCall(
5801 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5802 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5803 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5804 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5805 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5806
5807 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5808 Subtarget.isAIXABI())
5809 setUsesTOCBasePtr(DAG);
5810
5811 unsigned CallOpc =
5812 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5813 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5814
5815 if (!CFlags.IsIndirect)
5816 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5817 else if (Subtarget.usesFunctionDescriptors())
5818 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5819 dl, CFlags.HasNest, Subtarget);
5820 else
5821 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5822
5823 // Build the operand list for the call instruction.
5825 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5826 SPDiff, Subtarget);
5827
5828 // Emit tail call.
5829 if (CFlags.IsTailCall) {
5830 // Indirect tail call when using PC Relative calls do not have the same
5831 // constraints.
5832 assert(((Callee.getOpcode() == ISD::Register &&
5833 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5834 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5835 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5836 isa<ConstantSDNode>(Callee) ||
5837 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5838 "Expecting a global address, external symbol, absolute value, "
5839 "register or an indirect tail call when PC Relative calls are "
5840 "used.");
5841 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5842 assert(CallOpc == PPCISD::TC_RETURN &&
5843 "Unexpected call opcode for a tail call.");
5845 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5846 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5847 return Ret;
5848 }
5849
5850 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5851 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5852 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5853 Glue = Chain.getValue(1);
5854
5855 // When performing tail call optimization the callee pops its arguments off
5856 // the stack. Account for this here so these bytes can be pushed back on in
5857 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5858 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5860 ? NumBytes
5861 : 0;
5862
5863 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5864 Glue = Chain.getValue(1);
5865
5866 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5867 DAG, InVals);
5868}
5869
5871 CallingConv::ID CalleeCC = CB->getCallingConv();
5872 const Function *CallerFunc = CB->getCaller();
5873 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5874 const Function *CalleeFunc = CB->getCalledFunction();
5875 if (!CalleeFunc)
5876 return false;
5877 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5878
5881
5882 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5883 CalleeFunc->getAttributes(), Outs, *this,
5884 CalleeFunc->getDataLayout());
5885
5886 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5887 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5888 false /*isCalleeExternalSymbol*/);
5889}
5890
5891bool PPCTargetLowering::isEligibleForTCO(
5892 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5893 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5895 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5896 bool isCalleeExternalSymbol) const {
5897 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5898 return false;
5899
5900 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5901 return IsEligibleForTailCallOptimization_64SVR4(
5902 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5903 isCalleeExternalSymbol);
5904 else
5905 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5906 isVarArg, Ins);
5907}
5908
5909SDValue
5910PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5911 SmallVectorImpl<SDValue> &InVals) const {
5912 SelectionDAG &DAG = CLI.DAG;
5913 SDLoc &dl = CLI.DL;
5915 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5917 SDValue Chain = CLI.Chain;
5918 SDValue Callee = CLI.Callee;
5919 bool &isTailCall = CLI.IsTailCall;
5920 CallingConv::ID CallConv = CLI.CallConv;
5921 bool isVarArg = CLI.IsVarArg;
5922 bool isPatchPoint = CLI.IsPatchPoint;
5923 const CallBase *CB = CLI.CB;
5924
5925 if (isTailCall) {
5927 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5928 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5929 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5930 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5931
5932 isTailCall =
5933 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5934 &(MF.getFunction()), IsCalleeExternalSymbol);
5935 if (isTailCall) {
5936 ++NumTailCalls;
5937 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5938 ++NumSiblingCalls;
5939
5940 // PC Relative calls no longer guarantee that the callee is a Global
5941 // Address Node. The callee could be an indirect tail call in which
5942 // case the SDValue for the callee could be a load (to load the address
5943 // of a function pointer) or it may be a register copy (to move the
5944 // address of the callee from a function parameter into a virtual
5945 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5946 assert((Subtarget.isUsingPCRelativeCalls() ||
5947 isa<GlobalAddressSDNode>(Callee)) &&
5948 "Callee should be an llvm::Function object.");
5949
5950 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5951 << "\nTCO callee: ");
5952 LLVM_DEBUG(Callee.dump());
5953 }
5954 }
5955
5956 if (!isTailCall && CB && CB->isMustTailCall())
5957 report_fatal_error("failed to perform tail call elimination on a call "
5958 "site marked musttail");
5959
5960 // When long calls (i.e. indirect calls) are always used, calls are always
5961 // made via function pointer. If we have a function name, first translate it
5962 // into a pointer.
5963 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5964 !isTailCall)
5965 Callee = LowerGlobalAddress(Callee, DAG);
5966
5967 CallFlags CFlags(
5968 CallConv, isTailCall, isVarArg, isPatchPoint,
5969 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5970 // hasNest
5971 Subtarget.is64BitELFABI() &&
5972 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5973 CLI.NoMerge);
5974
5975 if (Subtarget.isAIXABI())
5976 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977 InVals, CB);
5978
5979 assert(Subtarget.isSVR4ABI());
5980 if (Subtarget.isPPC64())
5981 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5982 InVals, CB);
5983 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5984 InVals, CB);
5985}
5986
5987SDValue PPCTargetLowering::LowerCall_32SVR4(
5988 SDValue Chain, SDValue Callee, CallFlags CFlags,
5990 const SmallVectorImpl<SDValue> &OutVals,
5991 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5993 const CallBase *CB) const {
5994 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5995 // of the 32-bit SVR4 ABI stack frame layout.
5996
5997 const CallingConv::ID CallConv = CFlags.CallConv;
5998 const bool IsVarArg = CFlags.IsVarArg;
5999 const bool IsTailCall = CFlags.IsTailCall;
6000
6001 assert((CallConv == CallingConv::C ||
6002 CallConv == CallingConv::Cold ||
6003 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6004
6005 const Align PtrAlign(4);
6006
6008
6009 // Mark this function as potentially containing a function that contains a
6010 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6011 // and restoring the callers stack pointer in this functions epilog. This is
6012 // done because by tail calling the called function might overwrite the value
6013 // in this function's (MF) stack pointer stack slot 0(SP).
6014 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6015 CallConv == CallingConv::Fast)
6016 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6017
6018 // Count how many bytes are to be pushed on the stack, including the linkage
6019 // area, parameter list area and the part of the local variable space which
6020 // contains copies of aggregates which are passed by value.
6021
6022 // Assign locations to all of the outgoing arguments.
6024 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6025
6026 // Reserve space for the linkage area on the stack.
6027 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6028 PtrAlign);
6029 if (useSoftFloat())
6030 CCInfo.PreAnalyzeCallOperands(Outs);
6031
6032 if (IsVarArg) {
6033 // Handle fixed and variable vector arguments differently.
6034 // Fixed vector arguments go into registers as long as registers are
6035 // available. Variable vector arguments always go into memory.
6036 unsigned NumArgs = Outs.size();
6037
6038 for (unsigned i = 0; i != NumArgs; ++i) {
6039 MVT ArgVT = Outs[i].VT;
6040 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6041 bool Result;
6042
6043 if (Outs[i].IsFixed) {
6044 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6045 CCInfo);
6046 } else {
6048 ArgFlags, CCInfo);
6049 }
6050
6051 if (Result) {
6052#ifndef NDEBUG
6053 errs() << "Call operand #" << i << " has unhandled type "
6054 << ArgVT << "\n";
6055#endif
6056 llvm_unreachable(nullptr);
6057 }
6058 }
6059 } else {
6060 // All arguments are treated the same.
6061 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6062 }
6063 CCInfo.clearWasPPCF128();
6064
6065 // Assign locations to all of the outgoing aggregate by value arguments.
6066 SmallVector<CCValAssign, 16> ByValArgLocs;
6067 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6068
6069 // Reserve stack space for the allocations in CCInfo.
6070 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6071
6072 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6073
6074 // Size of the linkage area, parameter list area and the part of the local
6075 // space variable where copies of aggregates which are passed by value are
6076 // stored.
6077 unsigned NumBytes = CCByValInfo.getStackSize();
6078
6079 // Calculate by how many bytes the stack has to be adjusted in case of tail
6080 // call optimization.
6081 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6082
6083 // Adjust the stack pointer for the new arguments...
6084 // These operations are automatically eliminated by the prolog/epilog pass
6085 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6086 SDValue CallSeqStart = Chain;
6087
6088 // Load the return address and frame pointer so it can be moved somewhere else
6089 // later.
6090 SDValue LROp, FPOp;
6091 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6092
6093 // Set up a copy of the stack pointer for use loading and storing any
6094 // arguments that may not fit in the registers available for argument
6095 // passing.
6096 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6097
6099 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6100 SmallVector<SDValue, 8> MemOpChains;
6101
6102 bool seenFloatArg = false;
6103 // Walk the register/memloc assignments, inserting copies/loads.
6104 // i - Tracks the index into the list of registers allocated for the call
6105 // RealArgIdx - Tracks the index into the list of actual function arguments
6106 // j - Tracks the index into the list of byval arguments
6107 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6108 i != e;
6109 ++i, ++RealArgIdx) {
6110 CCValAssign &VA = ArgLocs[i];
6111 SDValue Arg = OutVals[RealArgIdx];
6112 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6113
6114 if (Flags.isByVal()) {
6115 // Argument is an aggregate which is passed by value, thus we need to
6116 // create a copy of it in the local variable space of the current stack
6117 // frame (which is the stack frame of the caller) and pass the address of
6118 // this copy to the callee.
6119 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6120 CCValAssign &ByValVA = ByValArgLocs[j++];
6121 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6122
6123 // Memory reserved in the local variable space of the callers stack frame.
6124 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6125
6126 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6127 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6128 StackPtr, PtrOff);
6129
6130 // Create a copy of the argument in the local area of the current
6131 // stack frame.
6132 SDValue MemcpyCall =
6133 CreateCopyOfByValArgument(Arg, PtrOff,
6134 CallSeqStart.getNode()->getOperand(0),
6135 Flags, DAG, dl);
6136
6137 // This must go outside the CALLSEQ_START..END.
6138 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6139 SDLoc(MemcpyCall));
6140 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6141 NewCallSeqStart.getNode());
6142 Chain = CallSeqStart = NewCallSeqStart;
6143
6144 // Pass the address of the aggregate copy on the stack either in a
6145 // physical register or in the parameter list area of the current stack
6146 // frame to the callee.
6147 Arg = PtrOff;
6148 }
6149
6150 // When useCRBits() is true, there can be i1 arguments.
6151 // It is because getRegisterType(MVT::i1) => MVT::i1,
6152 // and for other integer types getRegisterType() => MVT::i32.
6153 // Extend i1 and ensure callee will get i32.
6154 if (Arg.getValueType() == MVT::i1)
6155 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6156 dl, MVT::i32, Arg);
6157
6158 if (VA.isRegLoc()) {
6159 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6160 // Put argument in a physical register.
6161 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6162 bool IsLE = Subtarget.isLittleEndian();
6163 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6164 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6165 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6166 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6167 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6168 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6169 SVal.getValue(0)));
6170 } else
6171 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6172 } else {
6173 // Put argument in the parameter list area of the current stack frame.
6174 assert(VA.isMemLoc());
6175 unsigned LocMemOffset = VA.getLocMemOffset();
6176
6177 if (!IsTailCall) {
6178 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6179 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6180 StackPtr, PtrOff);
6181
6182 MemOpChains.push_back(
6183 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6184 } else {
6185 // Calculate and remember argument location.
6186 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6187 TailCallArguments);
6188 }
6189 }
6190 }
6191
6192 if (!MemOpChains.empty())
6193 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6194
6195 // Build a sequence of copy-to-reg nodes chained together with token chain
6196 // and flag operands which copy the outgoing args into the appropriate regs.
6197 SDValue InGlue;
6198 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6199 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6200 RegsToPass[i].second, InGlue);
6201 InGlue = Chain.getValue(1);
6202 }
6203
6204 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6205 // registers.
6206 if (IsVarArg) {
6207 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6208 SDValue Ops[] = { Chain, InGlue };
6209
6210 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6211 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6212
6213 InGlue = Chain.getValue(1);
6214 }
6215
6216 if (IsTailCall)
6217 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6218 TailCallArguments);
6219
6220 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6221 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6222}
6223
6224// Copy an argument into memory, being careful to do this outside the
6225// call sequence for the call to which the argument belongs.
6226SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6227 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6228 SelectionDAG &DAG, const SDLoc &dl) const {
6229 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6230 CallSeqStart.getNode()->getOperand(0),
6231 Flags, DAG, dl);
6232 // The MEMCPY must go outside the CALLSEQ_START..END.
6233 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6234 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6235 SDLoc(MemcpyCall));
6236 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6237 NewCallSeqStart.getNode());
6238 return NewCallSeqStart;
6239}
6240
6241SDValue PPCTargetLowering::LowerCall_64SVR4(
6242 SDValue Chain, SDValue Callee, CallFlags CFlags,
6244 const SmallVectorImpl<SDValue> &OutVals,
6245 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6247 const CallBase *CB) const {
6248 bool isELFv2ABI = Subtarget.isELFv2ABI();
6249 bool isLittleEndian = Subtarget.isLittleEndian();
6250 unsigned NumOps = Outs.size();
6251 bool IsSibCall = false;
6252 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6253
6254 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6255 unsigned PtrByteSize = 8;
6256
6258
6259 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6260 IsSibCall = true;
6261
6262 // Mark this function as potentially containing a function that contains a
6263 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6264 // and restoring the callers stack pointer in this functions epilog. This is
6265 // done because by tail calling the called function might overwrite the value
6266 // in this function's (MF) stack pointer stack slot 0(SP).
6267 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6268 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6269
6270 assert(!(IsFastCall && CFlags.IsVarArg) &&
6271 "fastcc not supported on varargs functions");
6272
6273 // Count how many bytes are to be pushed on the stack, including the linkage
6274 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6275 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6276 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6277 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6278 unsigned NumBytes = LinkageSize;
6279 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6280
6281 static const MCPhysReg GPR[] = {
6282 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6283 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6284 };
6285 static const MCPhysReg VR[] = {
6286 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6287 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6288 };
6289
6290 const unsigned NumGPRs = std::size(GPR);
6291 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6292 const unsigned NumVRs = std::size(VR);
6293
6294 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6295 // can be passed to the callee in registers.
6296 // For the fast calling convention, there is another check below.
6297 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6298 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6299 if (!HasParameterArea) {
6300 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6301 unsigned AvailableFPRs = NumFPRs;
6302 unsigned AvailableVRs = NumVRs;
6303 unsigned NumBytesTmp = NumBytes;
6304 for (unsigned i = 0; i != NumOps; ++i) {
6305 if (Outs[i].Flags.isNest()) continue;
6306 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6307 PtrByteSize, LinkageSize, ParamAreaSize,
6308 NumBytesTmp, AvailableFPRs, AvailableVRs))
6309 HasParameterArea = true;
6310 }
6311 }
6312
6313 // When using the fast calling convention, we don't provide backing for
6314 // arguments that will be in registers.
6315 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6316
6317 // Avoid allocating parameter area for fastcc functions if all the arguments
6318 // can be passed in the registers.
6319 if (IsFastCall)
6320 HasParameterArea = false;
6321
6322 // Add up all the space actually used.
6323 for (unsigned i = 0; i != NumOps; ++i) {
6324 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6325 EVT ArgVT = Outs[i].VT;
6326 EVT OrigVT = Outs[i].ArgVT;
6327
6328 if (Flags.isNest())
6329 continue;
6330
6331 if (IsFastCall) {
6332 if (Flags.isByVal()) {
6333 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6334 if (NumGPRsUsed > NumGPRs)
6335 HasParameterArea = true;
6336 } else {
6337 switch (ArgVT.getSimpleVT().SimpleTy) {
6338 default: llvm_unreachable("Unexpected ValueType for argument!");
6339 case MVT::i1:
6340 case MVT::i32:
6341 case MVT::i64:
6342 if (++NumGPRsUsed <= NumGPRs)
6343 continue;
6344 break;
6345 case MVT::v4i32:
6346 case MVT::v8i16:
6347 case MVT::v16i8:
6348 case MVT::v2f64:
6349 case MVT::v2i64:
6350 case MVT::v1i128:
6351 case MVT::f128:
6352 if (++NumVRsUsed <= NumVRs)
6353 continue;
6354 break;
6355 case MVT::v4f32:
6356 if (++NumVRsUsed <= NumVRs)
6357 continue;
6358 break;
6359 case MVT::f32:
6360 case MVT::f64:
6361 if (++NumFPRsUsed <= NumFPRs)
6362 continue;
6363 break;
6364 }
6365 HasParameterArea = true;
6366 }
6367 }
6368
6369 /* Respect alignment of argument on the stack. */
6370 auto Alignement =
6371 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6372 NumBytes = alignTo(NumBytes, Alignement);
6373
6374 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6375 if (Flags.isInConsecutiveRegsLast())
6376 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6377 }
6378
6379 unsigned NumBytesActuallyUsed = NumBytes;
6380
6381 // In the old ELFv1 ABI,
6382 // the prolog code of the callee may store up to 8 GPR argument registers to
6383 // the stack, allowing va_start to index over them in memory if its varargs.
6384 // Because we cannot tell if this is needed on the caller side, we have to
6385 // conservatively assume that it is needed. As such, make sure we have at
6386 // least enough stack space for the caller to store the 8 GPRs.
6387 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6388 // really requires memory operands, e.g. a vararg function.
6389 if (HasParameterArea)
6390 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6391 else
6392 NumBytes = LinkageSize;
6393
6394 // Tail call needs the stack to be aligned.
6395 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6396 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6397
6398 int SPDiff = 0;
6399
6400 // Calculate by how many bytes the stack has to be adjusted in case of tail
6401 // call optimization.
6402 if (!IsSibCall)
6403 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6404
6405 // To protect arguments on the stack from being clobbered in a tail call,
6406 // force all the loads to happen before doing any other lowering.
6407 if (CFlags.IsTailCall)
6408 Chain = DAG.getStackArgumentTokenFactor(Chain);
6409
6410 // Adjust the stack pointer for the new arguments...
6411 // These operations are automatically eliminated by the prolog/epilog pass
6412 if (!IsSibCall)
6413 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6414 SDValue CallSeqStart = Chain;
6415
6416 // Load the return address and frame pointer so it can be move somewhere else
6417 // later.
6418 SDValue LROp, FPOp;
6419 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6420
6421 // Set up a copy of the stack pointer for use loading and storing any
6422 // arguments that may not fit in the registers available for argument
6423 // passing.
6424 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6425
6426 // Figure out which arguments are going to go in registers, and which in
6427 // memory. Also, if this is a vararg function, floating point operations
6428 // must be stored to our stack, and loaded into integer regs as well, if
6429 // any integer regs are available for argument passing.
6430 unsigned ArgOffset = LinkageSize;
6431
6433 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6434
6435 SmallVector<SDValue, 8> MemOpChains;
6436 for (unsigned i = 0; i != NumOps; ++i) {
6437 SDValue Arg = OutVals[i];
6438 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6439 EVT ArgVT = Outs[i].VT;
6440 EVT OrigVT = Outs[i].ArgVT;
6441
6442 // PtrOff will be used to store the current argument to the stack if a
6443 // register cannot be found for it.
6444 SDValue PtrOff;
6445
6446 // We re-align the argument offset for each argument, except when using the
6447 // fast calling convention, when we need to make sure we do that only when
6448 // we'll actually use a stack slot.
6449 auto ComputePtrOff = [&]() {
6450 /* Respect alignment of argument on the stack. */
6451 auto Alignment =
6452 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6453 ArgOffset = alignTo(ArgOffset, Alignment);
6454
6455 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6456
6457 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6458 };
6459
6460 if (!IsFastCall) {
6461 ComputePtrOff();
6462
6463 /* Compute GPR index associated with argument offset. */
6464 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6465 GPR_idx = std::min(GPR_idx, NumGPRs);
6466 }
6467
6468 // Promote integers to 64-bit values.
6469 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6470 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6471 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6472 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6473 }
6474
6475 // FIXME memcpy is used way more than necessary. Correctness first.
6476 // Note: "by value" is code for passing a structure by value, not
6477 // basic types.
6478 if (Flags.isByVal()) {
6479 // Note: Size includes alignment padding, so
6480 // struct x { short a; char b; }
6481 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6482 // These are the proper values we need for right-justifying the
6483 // aggregate in a parameter register.
6484 unsigned Size = Flags.getByValSize();
6485
6486 // An empty aggregate parameter takes up no storage and no
6487 // registers.
6488 if (Size == 0)
6489 continue;
6490
6491 if (IsFastCall)
6492 ComputePtrOff();
6493
6494 // All aggregates smaller than 8 bytes must be passed right-justified.
6495 if (Size==1 || Size==2 || Size==4) {
6496 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6497 if (GPR_idx != NumGPRs) {
6498 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6499 MachinePointerInfo(), VT);
6500 MemOpChains.push_back(Load.getValue(1));
6501 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6502
6503 ArgOffset += PtrByteSize;
6504 continue;
6505 }
6506 }
6507
6508 if (GPR_idx == NumGPRs && Size < 8) {
6509 SDValue AddPtr = PtrOff;
6510 if (!isLittleEndian) {
6511 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6512 PtrOff.getValueType());
6513 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6514 }
6515 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6516 CallSeqStart,
6517 Flags, DAG, dl);
6518 ArgOffset += PtrByteSize;
6519 continue;
6520 }
6521 // Copy the object to parameter save area if it can not be entirely passed
6522 // by registers.
6523 // FIXME: we only need to copy the parts which need to be passed in
6524 // parameter save area. For the parts passed by registers, we don't need
6525 // to copy them to the stack although we need to allocate space for them
6526 // in parameter save area.
6527 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6528 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6529 CallSeqStart,
6530 Flags, DAG, dl);
6531
6532 // When a register is available, pass a small aggregate right-justified.
6533 if (Size < 8 && GPR_idx != NumGPRs) {
6534 // The easiest way to get this right-justified in a register
6535 // is to copy the structure into the rightmost portion of a
6536 // local variable slot, then load the whole slot into the
6537 // register.
6538 // FIXME: The memcpy seems to produce pretty awful code for
6539 // small aggregates, particularly for packed ones.
6540 // FIXME: It would be preferable to use the slot in the
6541 // parameter save area instead of a new local variable.
6542 SDValue AddPtr = PtrOff;
6543 if (!isLittleEndian) {
6544 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6545 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6546 }
6547 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6548 CallSeqStart,
6549 Flags, DAG, dl);
6550
6551 // Load the slot into the register.
6552 SDValue Load =
6553 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6554 MemOpChains.push_back(Load.getValue(1));
6555 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6556
6557 // Done with this argument.
6558 ArgOffset += PtrByteSize;
6559 continue;
6560 }
6561
6562 // For aggregates larger than PtrByteSize, copy the pieces of the
6563 // object that fit into registers from the parameter save area.
6564 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6565 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6566 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6567 if (GPR_idx != NumGPRs) {
6568 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6569 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6570 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6571 MachinePointerInfo(), ObjType);
6572
6573 MemOpChains.push_back(Load.getValue(1));
6574 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6575 ArgOffset += PtrByteSize;
6576 } else {
6577 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6578 break;
6579 }
6580 }
6581 continue;
6582 }
6583
6584 switch (Arg.getSimpleValueType().SimpleTy) {
6585 default: llvm_unreachable("Unexpected ValueType for argument!");
6586 case MVT::i1:
6587 case MVT::i32:
6588 case MVT::i64:
6589 if (Flags.isNest()) {
6590 // The 'nest' parameter, if any, is passed in R11.
6591 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6592 break;
6593 }
6594
6595 // These can be scalar arguments or elements of an integer array type
6596 // passed directly. Clang may use those instead of "byval" aggregate
6597 // types to avoid forcing arguments to memory unnecessarily.
6598 if (GPR_idx != NumGPRs) {
6599 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6600 } else {
6601 if (IsFastCall)
6602 ComputePtrOff();
6603
6604 assert(HasParameterArea &&
6605 "Parameter area must exist to pass an argument in memory.");
6606 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6607 true, CFlags.IsTailCall, false, MemOpChains,
6608 TailCallArguments, dl);
6609 if (IsFastCall)
6610 ArgOffset += PtrByteSize;
6611 }
6612 if (!IsFastCall)
6613 ArgOffset += PtrByteSize;
6614 break;
6615 case MVT::f32:
6616 case MVT::f64: {
6617 // These can be scalar arguments or elements of a float array type
6618 // passed directly. The latter are used to implement ELFv2 homogenous
6619 // float aggregates.
6620
6621 // Named arguments go into FPRs first, and once they overflow, the
6622 // remaining arguments go into GPRs and then the parameter save area.
6623 // Unnamed arguments for vararg functions always go to GPRs and
6624 // then the parameter save area. For now, put all arguments to vararg
6625 // routines always in both locations (FPR *and* GPR or stack slot).
6626 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6627 bool NeededLoad = false;
6628
6629 // First load the argument into the next available FPR.
6630 if (FPR_idx != NumFPRs)
6631 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6632
6633 // Next, load the argument into GPR or stack slot if needed.
6634 if (!NeedGPROrStack)
6635 ;
6636 else if (GPR_idx != NumGPRs && !IsFastCall) {
6637 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6638 // once we support fp <-> gpr moves.
6639
6640 // In the non-vararg case, this can only ever happen in the
6641 // presence of f32 array types, since otherwise we never run
6642 // out of FPRs before running out of GPRs.
6643 SDValue ArgVal;
6644
6645 // Double values are always passed in a single GPR.
6646 if (Arg.getValueType() != MVT::f32) {
6647 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6648
6649 // Non-array float values are extended and passed in a GPR.
6650 } else if (!Flags.isInConsecutiveRegs()) {
6651 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6652 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6653
6654 // If we have an array of floats, we collect every odd element
6655 // together with its predecessor into one GPR.
6656 } else if (ArgOffset % PtrByteSize != 0) {
6657 SDValue Lo, Hi;
6658 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6659 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6660 if (!isLittleEndian)
6661 std::swap(Lo, Hi);
6662 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6663
6664 // The final element, if even, goes into the first half of a GPR.
6665 } else if (Flags.isInConsecutiveRegsLast()) {
6666 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6667 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6668 if (!isLittleEndian)
6669 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6670 DAG.getConstant(32, dl, MVT::i32));
6671
6672 // Non-final even elements are skipped; they will be handled
6673 // together the with subsequent argument on the next go-around.
6674 } else
6675 ArgVal = SDValue();
6676
6677 if (ArgVal.getNode())
6678 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6679 } else {
6680 if (IsFastCall)
6681 ComputePtrOff();
6682
6683 // Single-precision floating-point values are mapped to the
6684 // second (rightmost) word of the stack doubleword.
6685 if (Arg.getValueType() == MVT::f32 &&
6686 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6687 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6688 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6689 }
6690
6691 assert(HasParameterArea &&
6692 "Parameter area must exist to pass an argument in memory.");
6693 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6694 true, CFlags.IsTailCall, false, MemOpChains,
6695 TailCallArguments, dl);
6696
6697 NeededLoad = true;
6698 }
6699 // When passing an array of floats, the array occupies consecutive
6700 // space in the argument area; only round up to the next doubleword
6701 // at the end of the array. Otherwise, each float takes 8 bytes.
6702 if (!IsFastCall || NeededLoad) {
6703 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6704 Flags.isInConsecutiveRegs()) ? 4 : 8;
6705 if (Flags.isInConsecutiveRegsLast())
6706 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6707 }
6708 break;
6709 }
6710 case MVT::v4f32:
6711 case MVT::v4i32:
6712 case MVT::v8i16:
6713 case MVT::v16i8:
6714 case MVT::v2f64:
6715 case MVT::v2i64:
6716 case MVT::v1i128:
6717 case MVT::f128:
6718 // These can be scalar arguments or elements of a vector array type
6719 // passed directly. The latter are used to implement ELFv2 homogenous
6720 // vector aggregates.
6721
6722 // For a varargs call, named arguments go into VRs or on the stack as
6723 // usual; unnamed arguments always go to the stack or the corresponding
6724 // GPRs when within range. For now, we always put the value in both
6725 // locations (or even all three).
6726 if (CFlags.IsVarArg) {
6727 assert(HasParameterArea &&
6728 "Parameter area must exist if we have a varargs call.");
6729 // We could elide this store in the case where the object fits
6730 // entirely in R registers. Maybe later.
6731 SDValue Store =
6732 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6733 MemOpChains.push_back(Store);
6734 if (VR_idx != NumVRs) {
6735 SDValue Load =
6736 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6737 MemOpChains.push_back(Load.getValue(1));
6738 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6739 }
6740 ArgOffset += 16;
6741 for (unsigned i=0; i<16; i+=PtrByteSize) {
6742 if (GPR_idx == NumGPRs)
6743 break;
6744 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6745 DAG.getConstant(i, dl, PtrVT));
6746 SDValue Load =
6747 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6748 MemOpChains.push_back(Load.getValue(1));
6749 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6750 }
6751 break;
6752 }
6753
6754 // Non-varargs Altivec params go into VRs or on the stack.
6755 if (VR_idx != NumVRs) {
6756 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6757 } else {
6758 if (IsFastCall)
6759 ComputePtrOff();
6760
6761 assert(HasParameterArea &&
6762 "Parameter area must exist to pass an argument in memory.");
6763 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6764 true, CFlags.IsTailCall, true, MemOpChains,
6765 TailCallArguments, dl);
6766 if (IsFastCall)
6767 ArgOffset += 16;
6768 }
6769
6770 if (!IsFastCall)
6771 ArgOffset += 16;
6772 break;
6773 }
6774 }
6775
6776 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6777 "mismatch in size of parameter area");
6778 (void)NumBytesActuallyUsed;
6779
6780 if (!MemOpChains.empty())
6781 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6782
6783 // Check if this is an indirect call (MTCTR/BCTRL).
6784 // See prepareDescriptorIndirectCall and buildCallOperands for more
6785 // information about calls through function pointers in the 64-bit SVR4 ABI.
6786 if (CFlags.IsIndirect) {
6787 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6788 // caller in the TOC save area.
6789 if (isTOCSaveRestoreRequired(Subtarget)) {
6790 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6791 // Load r2 into a virtual register and store it to the TOC save area.
6792 setUsesTOCBasePtr(DAG);
6793 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6794 // TOC save area offset.
6795 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6796 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6797 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6798 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6800 DAG.getMachineFunction(), TOCSaveOffset));
6801 }
6802 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6803 // This does not mean the MTCTR instruction must use R12; it's easier
6804 // to model this as an extra parameter, so do that.
6805 if (isELFv2ABI && !CFlags.IsPatchPoint)
6806 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6807 }
6808
6809 // Build a sequence of copy-to-reg nodes chained together with token chain
6810 // and flag operands which copy the outgoing args into the appropriate regs.
6811 SDValue InGlue;
6812 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6813 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6814 RegsToPass[i].second, InGlue);
6815 InGlue = Chain.getValue(1);
6816 }
6817
6818 if (CFlags.IsTailCall && !IsSibCall)
6819 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6820 TailCallArguments);
6821
6822 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6823 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6824}
6825
6826// Returns true when the shadow of a general purpose argument register
6827// in the parameter save area is aligned to at least 'RequiredAlign'.
6828static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6829 assert(RequiredAlign.value() <= 16 &&
6830 "Required alignment greater than stack alignment.");
6831 switch (Reg) {
6832 default:
6833 report_fatal_error("called on invalid register.");
6834 case PPC::R5:
6835 case PPC::R9:
6836 case PPC::X3:
6837 case PPC::X5:
6838 case PPC::X7:
6839 case PPC::X9:
6840 // These registers are 16 byte aligned which is the most strict aligment
6841 // we can support.
6842 return true;
6843 case PPC::R3:
6844 case PPC::R7:
6845 case PPC::X4:
6846 case PPC::X6:
6847 case PPC::X8:
6848 case PPC::X10:
6849 // The shadow of these registers in the PSA is 8 byte aligned.
6850 return RequiredAlign <= 8;
6851 case PPC::R4:
6852 case PPC::R6:
6853 case PPC::R8:
6854 case PPC::R10:
6855 return RequiredAlign <= 4;
6856 }
6857}
6858
6859static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6860 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6861 CCState &S) {
6862 AIXCCState &State = static_cast<AIXCCState &>(S);
6863 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6865 const bool IsPPC64 = Subtarget.isPPC64();
6866 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6867 const Align PtrAlign(PtrSize);
6868 const Align StackAlign(16);
6869 const MVT RegVT = Subtarget.getScalarIntVT();
6870
6871 if (ValVT == MVT::f128)
6872 report_fatal_error("f128 is unimplemented on AIX.");
6873
6874 if (ArgFlags.isNest())
6875 report_fatal_error("Nest arguments are unimplemented.");
6876
6877 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6878 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6879 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6880 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6881 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6882 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6883
6884 static const MCPhysReg VR[] = {// Vector registers.
6885 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6886 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6887 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6888
6889 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6890
6891 if (ArgFlags.isByVal()) {
6892 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6893 if (ByValAlign > StackAlign)
6894 report_fatal_error("Pass-by-value arguments with alignment greater than "
6895 "16 are not supported.");
6896
6897 const unsigned ByValSize = ArgFlags.getByValSize();
6898 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6899
6900 // An empty aggregate parameter takes up no storage and no registers,
6901 // but needs a MemLoc for a stack slot for the formal arguments side.
6902 if (ByValSize == 0) {
6904 State.getStackSize(), RegVT, LocInfo));
6905 return false;
6906 }
6907
6908 // Shadow allocate any registers that are not properly aligned.
6909 unsigned NextReg = State.getFirstUnallocated(GPRs);
6910 while (NextReg != GPRs.size() &&
6911 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6912 // Shadow allocate next registers since its aligment is not strict enough.
6913 MCRegister Reg = State.AllocateReg(GPRs);
6914 // Allocate the stack space shadowed by said register.
6915 State.AllocateStack(PtrSize, PtrAlign);
6916 assert(Reg && "Alocating register unexpectedly failed.");
6917 (void)Reg;
6918 NextReg = State.getFirstUnallocated(GPRs);
6919 }
6920
6921 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6922 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6923 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6924 if (MCRegister Reg = State.AllocateReg(GPRs))
6925 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6926 else {
6929 LocInfo));
6930 break;
6931 }
6932 }
6933 return false;
6934 }
6935
6936 // Arguments always reserve parameter save area.
6937 switch (ValVT.SimpleTy) {
6938 default:
6939 report_fatal_error("Unhandled value type for argument.");
6940 case MVT::i64:
6941 // i64 arguments should have been split to i32 for PPC32.
6942 assert(IsPPC64 && "PPC32 should have split i64 values.");
6943 [[fallthrough]];
6944 case MVT::i1:
6945 case MVT::i32: {
6946 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6947 // AIX integer arguments are always passed in register width.
6948 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6949 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6951 if (MCRegister Reg = State.AllocateReg(GPRs))
6952 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6953 else
6954 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6955
6956 return false;
6957 }
6958 case MVT::f32:
6959 case MVT::f64: {
6960 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6961 const unsigned StoreSize = LocVT.getStoreSize();
6962 // Floats are always 4-byte aligned in the PSA on AIX.
6963 // This includes f64 in 64-bit mode for ABI compatibility.
6964 const unsigned Offset =
6965 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6966 MCRegister FReg = State.AllocateReg(FPR);
6967 if (FReg)
6968 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6969
6970 // Reserve and initialize GPRs or initialize the PSA as required.
6971 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6972 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6973 assert(FReg && "An FPR should be available when a GPR is reserved.");
6974 if (State.isVarArg()) {
6975 // Successfully reserved GPRs are only initialized for vararg calls.
6976 // Custom handling is required for:
6977 // f64 in PPC32 needs to be split into 2 GPRs.
6978 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6979 State.addLoc(
6980 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6981 }
6982 } else {
6983 // If there are insufficient GPRs, the PSA needs to be initialized.
6984 // Initialization occurs even if an FPR was initialized for
6985 // compatibility with the AIX XL compiler. The full memory for the
6986 // argument will be initialized even if a prior word is saved in GPR.
6987 // A custom memLoc is used when the argument also passes in FPR so
6988 // that the callee handling can skip over it easily.
6989 State.addLoc(
6990 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6991 LocInfo)
6992 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6993 break;
6994 }
6995 }
6996
6997 return false;
6998 }
6999 case MVT::v4f32:
7000 case MVT::v4i32:
7001 case MVT::v8i16:
7002 case MVT::v16i8:
7003 case MVT::v2i64:
7004 case MVT::v2f64:
7005 case MVT::v1i128: {
7006 const unsigned VecSize = 16;
7007 const Align VecAlign(VecSize);
7008
7009 if (!State.isVarArg()) {
7010 // If there are vector registers remaining we don't consume any stack
7011 // space.
7012 if (MCRegister VReg = State.AllocateReg(VR)) {
7013 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7014 return false;
7015 }
7016 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7017 // might be allocated in the portion of the PSA that is shadowed by the
7018 // GPRs.
7019 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7020 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7021 return false;
7022 }
7023
7024 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7025 // Burn any underaligned registers and their shadowed stack space until
7026 // we reach the required alignment.
7027 while (NextRegIndex != GPRs.size() &&
7028 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7029 // Shadow allocate register and its stack shadow.
7030 MCRegister Reg = State.AllocateReg(GPRs);
7031 State.AllocateStack(PtrSize, PtrAlign);
7032 assert(Reg && "Allocating register unexpectedly failed.");
7033 (void)Reg;
7034 NextRegIndex = State.getFirstUnallocated(GPRs);
7035 }
7036
7037 // Vectors that are passed as fixed arguments are handled differently.
7038 // They are passed in VRs if any are available (unlike arguments passed
7039 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7040 // functions)
7041 if (State.isFixed(ValNo)) {
7042 if (MCRegister VReg = State.AllocateReg(VR)) {
7043 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7044 // Shadow allocate GPRs and stack space even though we pass in a VR.
7045 for (unsigned I = 0; I != VecSize; I += PtrSize)
7046 State.AllocateReg(GPRs);
7047 State.AllocateStack(VecSize, VecAlign);
7048 return false;
7049 }
7050 // No vector registers remain so pass on the stack.
7051 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7052 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053 return false;
7054 }
7055
7056 // If all GPRS are consumed then we pass the argument fully on the stack.
7057 if (NextRegIndex == GPRs.size()) {
7058 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7059 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7060 return false;
7061 }
7062
7063 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7064 // half of the argument, and then need to pass the remaining half on the
7065 // stack.
7066 if (GPRs[NextRegIndex] == PPC::R9) {
7067 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7068 State.addLoc(
7069 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7070
7071 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7072 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7073 assert(FirstReg && SecondReg &&
7074 "Allocating R9 or R10 unexpectedly failed.");
7075 State.addLoc(
7076 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7077 State.addLoc(
7078 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7079 return false;
7080 }
7081
7082 // We have enough GPRs to fully pass the vector argument, and we have
7083 // already consumed any underaligned registers. Start with the custom
7084 // MemLoc and then the custom RegLocs.
7085 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7086 State.addLoc(
7087 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7088 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7089 const MCRegister Reg = State.AllocateReg(GPRs);
7090 assert(Reg && "Failed to allocated register for vararg vector argument");
7091 State.addLoc(
7092 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7093 }
7094 return false;
7095 }
7096 }
7097 return true;
7098}
7099
7100// So far, this function is only used by LowerFormalArguments_AIX()
7102 bool IsPPC64,
7103 bool HasP8Vector,
7104 bool HasVSX) {
7105 assert((IsPPC64 || SVT != MVT::i64) &&
7106 "i64 should have been split for 32-bit codegen.");
7107
7108 switch (SVT) {
7109 default:
7110 report_fatal_error("Unexpected value type for formal argument");
7111 case MVT::i1:
7112 case MVT::i32:
7113 case MVT::i64:
7114 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7115 case MVT::f32:
7116 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7117 case MVT::f64:
7118 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7119 case MVT::v4f32:
7120 case MVT::v4i32:
7121 case MVT::v8i16:
7122 case MVT::v16i8:
7123 case MVT::v2i64:
7124 case MVT::v2f64:
7125 case MVT::v1i128:
7126 return &PPC::VRRCRegClass;
7127 }
7128}
7129
7131 SelectionDAG &DAG, SDValue ArgValue,
7132 MVT LocVT, const SDLoc &dl) {
7133 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7134 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7135
7136 if (Flags.isSExt())
7137 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7138 DAG.getValueType(ValVT));
7139 else if (Flags.isZExt())
7140 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7141 DAG.getValueType(ValVT));
7142
7143 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7144}
7145
7146static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7147 const unsigned LASize = FL->getLinkageSize();
7148
7149 if (PPC::GPRCRegClass.contains(Reg)) {
7150 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7151 "Reg must be a valid argument register!");
7152 return LASize + 4 * (Reg - PPC::R3);
7153 }
7154
7155 if (PPC::G8RCRegClass.contains(Reg)) {
7156 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7157 "Reg must be a valid argument register!");
7158 return LASize + 8 * (Reg - PPC::X3);
7159 }
7160
7161 llvm_unreachable("Only general purpose registers expected.");
7162}
7163
7164// AIX ABI Stack Frame Layout:
7165//
7166// Low Memory +--------------------------------------------+
7167// SP +---> | Back chain | ---+
7168// | +--------------------------------------------+ |
7169// | | Saved Condition Register | |
7170// | +--------------------------------------------+ |
7171// | | Saved Linkage Register | |
7172// | +--------------------------------------------+ | Linkage Area
7173// | | Reserved for compilers | |
7174// | +--------------------------------------------+ |
7175// | | Reserved for binders | |
7176// | +--------------------------------------------+ |
7177// | | Saved TOC pointer | ---+
7178// | +--------------------------------------------+
7179// | | Parameter save area |
7180// | +--------------------------------------------+
7181// | | Alloca space |
7182// | +--------------------------------------------+
7183// | | Local variable space |
7184// | +--------------------------------------------+
7185// | | Float/int conversion temporary |
7186// | +--------------------------------------------+
7187// | | Save area for AltiVec registers |
7188// | +--------------------------------------------+
7189// | | AltiVec alignment padding |
7190// | +--------------------------------------------+
7191// | | Save area for VRSAVE register |
7192// | +--------------------------------------------+
7193// | | Save area for General Purpose registers |
7194// | +--------------------------------------------+
7195// | | Save area for Floating Point registers |
7196// | +--------------------------------------------+
7197// +---- | Back chain |
7198// High Memory +--------------------------------------------+
7199//
7200// Specifications:
7201// AIX 7.2 Assembler Language Reference
7202// Subroutine linkage convention
7203
7204SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7205 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7206 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7207 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7208
7209 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7210 CallConv == CallingConv::Fast) &&
7211 "Unexpected calling convention!");
7212
7213 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7214 report_fatal_error("Tail call support is unimplemented on AIX.");
7215
7216 if (useSoftFloat())
7217 report_fatal_error("Soft float support is unimplemented on AIX.");
7218
7219 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7220
7221 const bool IsPPC64 = Subtarget.isPPC64();
7222 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7223
7224 // Assign locations to all of the incoming arguments.
7227 MachineFrameInfo &MFI = MF.getFrameInfo();
7228 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7229 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7230
7231 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7232 // Reserve space for the linkage area on the stack.
7233 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7234 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7235 uint64_t SaveStackPos = CCInfo.getStackSize();
7236 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7237 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7238
7240
7241 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7242 CCValAssign &VA = ArgLocs[I++];
7243 MVT LocVT = VA.getLocVT();
7244 MVT ValVT = VA.getValVT();
7245 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7246
7247 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7248 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7249 // For compatibility with the AIX XL compiler, the float args in the
7250 // parameter save area are initialized even if the argument is available
7251 // in register. The caller is required to initialize both the register
7252 // and memory, however, the callee can choose to expect it in either.
7253 // The memloc is dismissed here because the argument is retrieved from
7254 // the register.
7255 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7256 continue;
7257
7258 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7259 const TargetRegisterClass *RegClass = getRegClassForSVT(
7260 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7261 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7262 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7263 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7264 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7265 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7266 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7267 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7268 MachinePointerInfo(), Align(PtrByteSize));
7269 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7270 MemOps.push_back(StoreReg);
7271 }
7272
7273 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7274 unsigned StoreSize =
7275 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7276 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7277 }
7278
7279 auto HandleMemLoc = [&]() {
7280 const unsigned LocSize = LocVT.getStoreSize();
7281 const unsigned ValSize = ValVT.getStoreSize();
7282 assert((ValSize <= LocSize) &&
7283 "Object size is larger than size of MemLoc");
7284 int CurArgOffset = VA.getLocMemOffset();
7285 // Objects are right-justified because AIX is big-endian.
7286 if (LocSize > ValSize)
7287 CurArgOffset += LocSize - ValSize;
7288 // Potential tail calls could cause overwriting of argument stack slots.
7289 const bool IsImmutable =
7291 (CallConv == CallingConv::Fast));
7292 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7293 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7294 SDValue ArgValue =
7295 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7296
7297 // While the ABI specifies the argument type is (sign or zero) extended
7298 // out to register width, not all code is compliant. We truncate and
7299 // re-extend to be more forgiving of these callers when the argument type
7300 // is smaller than register width.
7301 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7302 ValVT.isInteger() &&
7303 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7304 SDValue ArgValueTrunc = DAG.getNode(
7305 ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7306 ArgValue);
7307 SDValue ArgValueExt =
7308 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7309 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7310 InVals.push_back(ArgValueExt);
7311 } else {
7312 InVals.push_back(ArgValue);
7313 }
7314 };
7315
7316 // Vector arguments to VaArg functions are passed both on the stack, and
7317 // in any available GPRs. Load the value from the stack and add the GPRs
7318 // as live ins.
7319 if (VA.isMemLoc() && VA.needsCustom()) {
7320 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7321 assert(isVarArg && "Only use custom memloc for vararg.");
7322 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7323 // matching custom RegLocs.
7324 const unsigned OriginalValNo = VA.getValNo();
7325 (void)OriginalValNo;
7326
7327 auto HandleCustomVecRegLoc = [&]() {
7328 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7329 "Missing custom RegLoc.");
7330 VA = ArgLocs[I++];
7331 assert(VA.getValVT().isVector() &&
7332 "Unexpected Val type for custom RegLoc.");
7333 assert(VA.getValNo() == OriginalValNo &&
7334 "ValNo mismatch between custom MemLoc and RegLoc.");
7336 MF.addLiveIn(VA.getLocReg(),
7337 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7338 Subtarget.hasVSX()));
7339 };
7340
7341 HandleMemLoc();
7342 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7343 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7344 // R10.
7345 HandleCustomVecRegLoc();
7346 HandleCustomVecRegLoc();
7347
7348 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7349 // we passed the vector in R5, R6, R7 and R8.
7350 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7351 assert(!IsPPC64 &&
7352 "Only 2 custom RegLocs expected for 64-bit codegen.");
7353 HandleCustomVecRegLoc();
7354 HandleCustomVecRegLoc();
7355 }
7356
7357 continue;
7358 }
7359
7360 if (VA.isRegLoc()) {
7361 if (VA.getValVT().isScalarInteger())
7363 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7364 switch (VA.getValVT().SimpleTy) {
7365 default:
7366 report_fatal_error("Unhandled value type for argument.");
7367 case MVT::f32:
7369 break;
7370 case MVT::f64:
7372 break;
7373 }
7374 } else if (VA.getValVT().isVector()) {
7375 switch (VA.getValVT().SimpleTy) {
7376 default:
7377 report_fatal_error("Unhandled value type for argument.");
7378 case MVT::v16i8:
7380 break;
7381 case MVT::v8i16:
7383 break;
7384 case MVT::v4i32:
7385 case MVT::v2i64:
7386 case MVT::v1i128:
7388 break;
7389 case MVT::v4f32:
7390 case MVT::v2f64:
7392 break;
7393 }
7394 }
7395 }
7396
7397 if (Flags.isByVal() && VA.isMemLoc()) {
7398 const unsigned Size =
7399 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7400 PtrByteSize);
7401 const int FI = MF.getFrameInfo().CreateFixedObject(
7402 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7403 /* IsAliased */ true);
7404 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7405 InVals.push_back(FIN);
7406
7407 continue;
7408 }
7409
7410 if (Flags.isByVal()) {
7411 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7412
7413 const MCPhysReg ArgReg = VA.getLocReg();
7414 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7415
7416 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7417 const int FI = MF.getFrameInfo().CreateFixedObject(
7418 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7419 /* IsAliased */ true);
7420 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7421 InVals.push_back(FIN);
7422
7423 // Add live ins for all the RegLocs for the same ByVal.
7424 const TargetRegisterClass *RegClass =
7425 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7426
7427 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7428 unsigned Offset) {
7429 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7430 // Since the callers side has left justified the aggregate in the
7431 // register, we can simply store the entire register into the stack
7432 // slot.
7433 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7434 // The store to the fixedstack object is needed becuase accessing a
7435 // field of the ByVal will use a gep and load. Ideally we will optimize
7436 // to extracting the value from the register directly, and elide the
7437 // stores when the arguments address is not taken, but that will need to
7438 // be future work.
7439 SDValue Store = DAG.getStore(
7440 CopyFrom.getValue(1), dl, CopyFrom,
7443
7444 MemOps.push_back(Store);
7445 };
7446
7447 unsigned Offset = 0;
7448 HandleRegLoc(VA.getLocReg(), Offset);
7449 Offset += PtrByteSize;
7450 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7451 Offset += PtrByteSize) {
7452 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7453 "RegLocs should be for ByVal argument.");
7454
7455 const CCValAssign RL = ArgLocs[I++];
7456 HandleRegLoc(RL.getLocReg(), Offset);
7458 }
7459
7460 if (Offset != StackSize) {
7461 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7462 "Expected MemLoc for remaining bytes.");
7463 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7464 // Consume the MemLoc.The InVal has already been emitted, so nothing
7465 // more needs to be done.
7466 ++I;
7467 }
7468
7469 continue;
7470 }
7471
7472 if (VA.isRegLoc() && !VA.needsCustom()) {
7473 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7474 Register VReg =
7475 MF.addLiveIn(VA.getLocReg(),
7476 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7477 Subtarget.hasVSX()));
7478 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7479 if (ValVT.isScalarInteger() &&
7480 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7481 ArgValue =
7482 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7483 }
7484 InVals.push_back(ArgValue);
7485 continue;
7486 }
7487 if (VA.isMemLoc()) {
7488 HandleMemLoc();
7489 continue;
7490 }
7491 }
7492
7493 // On AIX a minimum of 8 words is saved to the parameter save area.
7494 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7495 // Area that is at least reserved in the caller of this function.
7496 unsigned CallerReservedArea = std::max<unsigned>(
7497 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7498
7499 // Set the size that is at least reserved in caller of this function. Tail
7500 // call optimized function's reserved stack space needs to be aligned so
7501 // that taking the difference between two stack areas will result in an
7502 // aligned stack.
7503 CallerReservedArea =
7504 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7505 FuncInfo->setMinReservedArea(CallerReservedArea);
7506
7507 if (isVarArg) {
7508 FuncInfo->setVarArgsFrameIndex(
7509 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7510 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7511
7512 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7513 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7514
7515 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7516 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7517 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7518
7519 // The fixed integer arguments of a variadic function are stored to the
7520 // VarArgsFrameIndex on the stack so that they may be loaded by
7521 // dereferencing the result of va_next.
7522 for (unsigned GPRIndex =
7523 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7524 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7525
7526 const Register VReg =
7527 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7528 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7529
7530 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7531 SDValue Store =
7532 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7533 MemOps.push_back(Store);
7534 // Increment the address for the next argument to store.
7535 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7536 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7537 }
7538 }
7539
7540 if (!MemOps.empty())
7541 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7542
7543 return Chain;
7544}
7545
7546SDValue PPCTargetLowering::LowerCall_AIX(
7547 SDValue Chain, SDValue Callee, CallFlags CFlags,
7549 const SmallVectorImpl<SDValue> &OutVals,
7550 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7552 const CallBase *CB) const {
7553 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7554 // AIX ABI stack frame layout.
7555
7556 assert((CFlags.CallConv == CallingConv::C ||
7557 CFlags.CallConv == CallingConv::Cold ||
7558 CFlags.CallConv == CallingConv::Fast) &&
7559 "Unexpected calling convention!");
7560
7561 if (CFlags.IsPatchPoint)
7562 report_fatal_error("This call type is unimplemented on AIX.");
7563
7564 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7565
7568 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7569 *DAG.getContext());
7570
7571 // Reserve space for the linkage save area (LSA) on the stack.
7572 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7573 // [SP][CR][LR][2 x reserved][TOC].
7574 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7575 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7576 const bool IsPPC64 = Subtarget.isPPC64();
7577 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7578 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7579 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7580 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7581
7582 // The prolog code of the callee may store up to 8 GPR argument registers to
7583 // the stack, allowing va_start to index over them in memory if the callee
7584 // is variadic.
7585 // Because we cannot tell if this is needed on the caller side, we have to
7586 // conservatively assume that it is needed. As such, make sure we have at
7587 // least enough stack space for the caller to store the 8 GPRs.
7588 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7589 const unsigned NumBytes = std::max<unsigned>(
7590 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7591
7592 // Adjust the stack pointer for the new arguments...
7593 // These operations are automatically eliminated by the prolog/epilog pass.
7594 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7595 SDValue CallSeqStart = Chain;
7596
7598 SmallVector<SDValue, 8> MemOpChains;
7599
7600 // Set up a copy of the stack pointer for loading and storing any
7601 // arguments that may not fit in the registers available for argument
7602 // passing.
7603 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7604 : DAG.getRegister(PPC::R1, MVT::i32);
7605
7606 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7607 const unsigned ValNo = ArgLocs[I].getValNo();
7608 SDValue Arg = OutVals[ValNo];
7609 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7610
7611 if (Flags.isByVal()) {
7612 const unsigned ByValSize = Flags.getByValSize();
7613
7614 // Nothing to do for zero-sized ByVals on the caller side.
7615 if (!ByValSize) {
7616 ++I;
7617 continue;
7618 }
7619
7620 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7621 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7622 (LoadOffset != 0)
7623 ? DAG.getObjectPtrOffset(
7624 dl, Arg, TypeSize::getFixed(LoadOffset))
7625 : Arg,
7626 MachinePointerInfo(), VT);
7627 };
7628
7629 unsigned LoadOffset = 0;
7630
7631 // Initialize registers, which are fully occupied by the by-val argument.
7632 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7633 SDValue Load = GetLoad(PtrVT, LoadOffset);
7634 MemOpChains.push_back(Load.getValue(1));
7635 LoadOffset += PtrByteSize;
7636 const CCValAssign &ByValVA = ArgLocs[I++];
7637 assert(ByValVA.getValNo() == ValNo &&
7638 "Unexpected location for pass-by-value argument.");
7639 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7640 }
7641
7642 if (LoadOffset == ByValSize)
7643 continue;
7644
7645 // There must be one more loc to handle the remainder.
7646 assert(ArgLocs[I].getValNo() == ValNo &&
7647 "Expected additional location for by-value argument.");
7648
7649 if (ArgLocs[I].isMemLoc()) {
7650 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7651 const CCValAssign &ByValVA = ArgLocs[I++];
7652 ISD::ArgFlagsTy MemcpyFlags = Flags;
7653 // Only memcpy the bytes that don't pass in register.
7654 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7655 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7656 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7657 dl, Arg, TypeSize::getFixed(LoadOffset))
7658 : Arg,
7660 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7661 CallSeqStart, MemcpyFlags, DAG, dl);
7662 continue;
7663 }
7664
7665 // Initialize the final register residue.
7666 // Any residue that occupies the final by-val arg register must be
7667 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7668 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7669 // 2 and 1 byte loads.
7670 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7671 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7672 "Unexpected register residue for by-value argument.");
7673 SDValue ResidueVal;
7674 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7675 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7676 const MVT VT =
7677 N == 1 ? MVT::i8
7678 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7679 SDValue Load = GetLoad(VT, LoadOffset);
7680 MemOpChains.push_back(Load.getValue(1));
7681 LoadOffset += N;
7682 Bytes += N;
7683
7684 // By-val arguments are passed left-justfied in register.
7685 // Every load here needs to be shifted, otherwise a full register load
7686 // should have been used.
7687 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7688 "Unexpected load emitted during handling of pass-by-value "
7689 "argument.");
7690 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7691 EVT ShiftAmountTy =
7692 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7693 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7694 SDValue ShiftedLoad =
7695 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7696 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7697 ShiftedLoad)
7698 : ShiftedLoad;
7699 }
7700
7701 const CCValAssign &ByValVA = ArgLocs[I++];
7702 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7703 continue;
7704 }
7705
7706 CCValAssign &VA = ArgLocs[I++];
7707 const MVT LocVT = VA.getLocVT();
7708 const MVT ValVT = VA.getValVT();
7709
7710 switch (VA.getLocInfo()) {
7711 default:
7712 report_fatal_error("Unexpected argument extension type.");
7713 case CCValAssign::Full:
7714 break;
7715 case CCValAssign::ZExt:
7716 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7717 break;
7718 case CCValAssign::SExt:
7719 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7720 break;
7721 }
7722
7723 if (VA.isRegLoc() && !VA.needsCustom()) {
7724 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7725 continue;
7726 }
7727
7728 // Vector arguments passed to VarArg functions need custom handling when
7729 // they are passed (at least partially) in GPRs.
7730 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7731 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7732 // Store value to its stack slot.
7733 SDValue PtrOff =
7734 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7735 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7736 SDValue Store =
7737 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7738 MemOpChains.push_back(Store);
7739 const unsigned OriginalValNo = VA.getValNo();
7740 // Then load the GPRs from the stack
7741 unsigned LoadOffset = 0;
7742 auto HandleCustomVecRegLoc = [&]() {
7743 assert(I != E && "Unexpected end of CCvalAssigns.");
7744 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7745 "Expected custom RegLoc.");
7746 CCValAssign RegVA = ArgLocs[I++];
7747 assert(RegVA.getValNo() == OriginalValNo &&
7748 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7749 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7750 DAG.getConstant(LoadOffset, dl, PtrVT));
7751 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7752 MemOpChains.push_back(Load.getValue(1));
7753 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7754 LoadOffset += PtrByteSize;
7755 };
7756
7757 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7758 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7759 // R10.
7760 HandleCustomVecRegLoc();
7761 HandleCustomVecRegLoc();
7762
7763 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7764 ArgLocs[I].getValNo() == OriginalValNo) {
7765 assert(!IsPPC64 &&
7766 "Only 2 custom RegLocs expected for 64-bit codegen.");
7767 HandleCustomVecRegLoc();
7768 HandleCustomVecRegLoc();
7769 }
7770
7771 continue;
7772 }
7773
7774 if (VA.isMemLoc()) {
7775 SDValue PtrOff =
7776 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7777 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7778 MemOpChains.push_back(
7779 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7780
7781 continue;
7782 }
7783
7784 if (!ValVT.isFloatingPoint())
7786 "Unexpected register handling for calling convention.");
7787
7788 // Custom handling is used for GPR initializations for vararg float
7789 // arguments.
7790 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7791 LocVT.isInteger() &&
7792 "Custom register handling only expected for VarArg.");
7793
7794 SDValue ArgAsInt =
7795 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7796
7797 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7798 // f32 in 32-bit GPR
7799 // f64 in 64-bit GPR
7800 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7801 else if (Arg.getValueType().getFixedSizeInBits() <
7802 LocVT.getFixedSizeInBits())
7803 // f32 in 64-bit GPR.
7804 RegsToPass.push_back(std::make_pair(
7805 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7806 else {
7807 // f64 in two 32-bit GPRs
7808 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7809 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7810 "Unexpected custom register for argument!");
7811 CCValAssign &GPR1 = VA;
7812 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7813 DAG.getConstant(32, dl, MVT::i8));
7814 RegsToPass.push_back(std::make_pair(
7815 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7816
7817 if (I != E) {
7818 // If only 1 GPR was available, there will only be one custom GPR and
7819 // the argument will also pass in memory.
7820 CCValAssign &PeekArg = ArgLocs[I];
7821 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7822 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7823 CCValAssign &GPR2 = ArgLocs[I++];
7824 RegsToPass.push_back(std::make_pair(
7825 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7826 }
7827 }
7828 }
7829 }
7830
7831 if (!MemOpChains.empty())
7832 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7833
7834 // For indirect calls, we need to save the TOC base to the stack for
7835 // restoration after the call.
7836 if (CFlags.IsIndirect) {
7837 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7838 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7839 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7840 const MVT PtrVT = Subtarget.getScalarIntVT();
7841 const unsigned TOCSaveOffset =
7842 Subtarget.getFrameLowering()->getTOCSaveOffset();
7843
7844 setUsesTOCBasePtr(DAG);
7845 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7846 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7847 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7848 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7849 Chain = DAG.getStore(
7850 Val.getValue(1), dl, Val, AddPtr,
7851 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7852 }
7853
7854 // Build a sequence of copy-to-reg nodes chained together with token chain
7855 // and flag operands which copy the outgoing args into the appropriate regs.
7856 SDValue InGlue;
7857 for (auto Reg : RegsToPass) {
7858 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7859 InGlue = Chain.getValue(1);
7860 }
7861
7862 const int SPDiff = 0;
7863 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7864 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7865}
7866
7867bool
7868PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7869 MachineFunction &MF, bool isVarArg,
7871 LLVMContext &Context,
7872 const Type *RetTy) const {
7874 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7875 return CCInfo.CheckReturn(
7876 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7878 : RetCC_PPC);
7879}
7880
7881SDValue
7882PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7883 bool isVarArg,
7885 const SmallVectorImpl<SDValue> &OutVals,
7886 const SDLoc &dl, SelectionDAG &DAG) const {
7888 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7889 *DAG.getContext());
7890 CCInfo.AnalyzeReturn(Outs,
7891 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7893 : RetCC_PPC);
7894
7895 SDValue Glue;
7896 SmallVector<SDValue, 4> RetOps(1, Chain);
7897
7898 // Copy the result values into the output registers.
7899 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7900 CCValAssign &VA = RVLocs[i];
7901 assert(VA.isRegLoc() && "Can only return in registers!");
7902
7903 SDValue Arg = OutVals[RealResIdx];
7904
7905 switch (VA.getLocInfo()) {
7906 default: llvm_unreachable("Unknown loc info!");
7907 case CCValAssign::Full: break;
7908 case CCValAssign::AExt:
7909 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7910 break;
7911 case CCValAssign::ZExt:
7912 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7913 break;
7914 case CCValAssign::SExt:
7915 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7916 break;
7917 }
7918 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7919 bool isLittleEndian = Subtarget.isLittleEndian();
7920 // Legalize ret f64 -> ret 2 x i32.
7921 SDValue SVal =
7922 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7923 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7924 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7925 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7926 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7927 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7928 Glue = Chain.getValue(1);
7929 VA = RVLocs[++i]; // skip ahead to next loc
7930 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7931 } else
7932 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7933 Glue = Chain.getValue(1);
7934 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7935 }
7936
7937 RetOps[0] = Chain; // Update chain.
7938
7939 // Add the glue if we have it.
7940 if (Glue.getNode())
7941 RetOps.push_back(Glue);
7942
7943 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7944}
7945
7946SDValue
7947PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 SDLoc dl(Op);
7950
7951 // Get the correct type for integers.
7952 EVT IntVT = Op.getValueType();
7953
7954 // Get the inputs.
7955 SDValue Chain = Op.getOperand(0);
7956 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7957 // Build a DYNAREAOFFSET node.
7958 SDValue Ops[2] = {Chain, FPSIdx};
7959 SDVTList VTs = DAG.getVTList(IntVT);
7960 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7961}
7962
7963SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7964 SelectionDAG &DAG) const {
7965 // When we pop the dynamic allocation we need to restore the SP link.
7966 SDLoc dl(Op);
7967
7968 // Get the correct type for pointers.
7969 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7970
7971 // Construct the stack pointer operand.
7972 bool isPPC64 = Subtarget.isPPC64();
7973 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7974 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7975
7976 // Get the operands for the STACKRESTORE.
7977 SDValue Chain = Op.getOperand(0);
7978 SDValue SaveSP = Op.getOperand(1);
7979
7980 // Load the old link SP.
7981 SDValue LoadLinkSP =
7982 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7983
7984 // Restore the stack pointer.
7985 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7986
7987 // Store the old link SP.
7988 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7989}
7990
7991SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7993 bool isPPC64 = Subtarget.isPPC64();
7994 EVT PtrVT = getPointerTy(MF.getDataLayout());
7995
7996 // Get current frame pointer save index. The users of this index will be
7997 // primarily DYNALLOC instructions.
7999 int RASI = FI->getReturnAddrSaveIndex();
8000
8001 // If the frame pointer save index hasn't been defined yet.
8002 if (!RASI) {
8003 // Find out what the fix offset of the frame pointer save area.
8004 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8005 // Allocate the frame index for frame pointer save area.
8006 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8007 // Save the result.
8008 FI->setReturnAddrSaveIndex(RASI);
8009 }
8010 return DAG.getFrameIndex(RASI, PtrVT);
8011}
8012
8013SDValue
8014PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8016 bool isPPC64 = Subtarget.isPPC64();
8017 EVT PtrVT = getPointerTy(MF.getDataLayout());
8018
8019 // Get current frame pointer save index. The users of this index will be
8020 // primarily DYNALLOC instructions.
8022 int FPSI = FI->getFramePointerSaveIndex();
8023
8024 // If the frame pointer save index hasn't been defined yet.
8025 if (!FPSI) {
8026 // Find out what the fix offset of the frame pointer save area.
8027 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8028 // Allocate the frame index for frame pointer save area.
8029 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8030 // Save the result.
8031 FI->setFramePointerSaveIndex(FPSI);
8032 }
8033 return DAG.getFrameIndex(FPSI, PtrVT);
8034}
8035
8036SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8037 SelectionDAG &DAG) const {
8039 // Get the inputs.
8040 SDValue Chain = Op.getOperand(0);
8041 SDValue Size = Op.getOperand(1);
8042 SDLoc dl(Op);
8043
8044 // Get the correct type for pointers.
8045 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8046 // Negate the size.
8047 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8048 DAG.getConstant(0, dl, PtrVT), Size);
8049 // Construct a node for the frame pointer save index.
8050 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8051 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8052 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8053 if (hasInlineStackProbe(MF))
8054 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8055 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8056}
8057
8058SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8059 SelectionDAG &DAG) const {
8061
8062 bool isPPC64 = Subtarget.isPPC64();
8063 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8064
8065 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8066 return DAG.getFrameIndex(FI, PtrVT);
8067}
8068
8069SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8070 SelectionDAG &DAG) const {
8071 SDLoc DL(Op);
8072 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8073 DAG.getVTList(MVT::i32, MVT::Other),
8074 Op.getOperand(0), Op.getOperand(1));
8075}
8076
8077SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8078 SelectionDAG &DAG) const {
8079 SDLoc DL(Op);
8080 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8081 Op.getOperand(0), Op.getOperand(1));
8082}
8083
8084SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8085 if (Op.getValueType().isVector())
8086 return LowerVectorLoad(Op, DAG);
8087
8088 assert(Op.getValueType() == MVT::i1 &&
8089 "Custom lowering only for i1 loads");
8090
8091 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8092
8093 SDLoc dl(Op);
8094 LoadSDNode *LD = cast<LoadSDNode>(Op);
8095
8096 SDValue Chain = LD->getChain();
8097 SDValue BasePtr = LD->getBasePtr();
8098 MachineMemOperand *MMO = LD->getMemOperand();
8099
8100 SDValue NewLD =
8101 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8102 BasePtr, MVT::i8, MMO);
8103 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8104
8105 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8106 return DAG.getMergeValues(Ops, dl);
8107}
8108
8109SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8110 if (Op.getOperand(1).getValueType().isVector())
8111 return LowerVectorStore(Op, DAG);
8112
8113 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8114 "Custom lowering only for i1 stores");
8115
8116 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8117
8118 SDLoc dl(Op);
8119 StoreSDNode *ST = cast<StoreSDNode>(Op);
8120
8121 SDValue Chain = ST->getChain();
8122 SDValue BasePtr = ST->getBasePtr();
8123 SDValue Value = ST->getValue();
8124 MachineMemOperand *MMO = ST->getMemOperand();
8125
8127 Value);
8128 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8129}
8130
8131// FIXME: Remove this once the ANDI glue bug is fixed:
8132SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8133 assert(Op.getValueType() == MVT::i1 &&
8134 "Custom lowering only for i1 results");
8135
8136 SDLoc DL(Op);
8137 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8138}
8139
8140SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8141 SelectionDAG &DAG) const {
8142
8143 // Implements a vector truncate that fits in a vector register as a shuffle.
8144 // We want to legalize vector truncates down to where the source fits in
8145 // a vector register (and target is therefore smaller than vector register
8146 // size). At that point legalization will try to custom lower the sub-legal
8147 // result and get here - where we can contain the truncate as a single target
8148 // operation.
8149
8150 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8151 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8152 //
8153 // We will implement it for big-endian ordering as this (where x denotes
8154 // undefined):
8155 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8156 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8157 //
8158 // The same operation in little-endian ordering will be:
8159 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8160 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8161
8162 EVT TrgVT = Op.getValueType();
8163 assert(TrgVT.isVector() && "Vector type expected.");
8164 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8165 EVT EltVT = TrgVT.getVectorElementType();
8166 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8167 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8168 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8169 return SDValue();
8170
8171 SDValue N1 = Op.getOperand(0);
8172 EVT SrcVT = N1.getValueType();
8173 unsigned SrcSize = SrcVT.getSizeInBits();
8174 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8175 !llvm::has_single_bit<uint32_t>(
8177 return SDValue();
8178 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8179 return SDValue();
8180
8181 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8182 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8183
8184 SDLoc DL(Op);
8185 SDValue Op1, Op2;
8186 if (SrcSize == 256) {
8187 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8188 EVT SplitVT =
8190 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8191 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8192 DAG.getConstant(0, DL, VecIdxTy));
8193 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8194 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8195 }
8196 else {
8197 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8198 Op2 = DAG.getUNDEF(WideVT);
8199 }
8200
8201 // First list the elements we want to keep.
8202 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8203 SmallVector<int, 16> ShuffV;
8204 if (Subtarget.isLittleEndian())
8205 for (unsigned i = 0; i < TrgNumElts; ++i)
8206 ShuffV.push_back(i * SizeMult);
8207 else
8208 for (unsigned i = 1; i <= TrgNumElts; ++i)
8209 ShuffV.push_back(i * SizeMult - 1);
8210
8211 // Populate the remaining elements with undefs.
8212 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8213 // ShuffV.push_back(i + WideNumElts);
8214 ShuffV.push_back(WideNumElts + 1);
8215
8216 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8217 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8218 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8219}
8220
8221/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8222/// possible.
8223SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8224 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8225 EVT ResVT = Op.getValueType();
8226 EVT CmpVT = Op.getOperand(0).getValueType();
8227 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8228 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8229 SDLoc dl(Op);
8230
8231 // Without power9-vector, we don't have native instruction for f128 comparison.
8232 // Following transformation to libcall is needed for setcc:
8233 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8234 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8235 SDValue Z = DAG.getSetCC(
8236 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8237 LHS, RHS, CC);
8238 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8239 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8240 }
8241
8242 // Not FP, or using SPE? Not a fsel.
8243 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8244 Subtarget.hasSPE())
8245 return Op;
8246
8247 SDNodeFlags Flags = Op.getNode()->getFlags();
8248
8249 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8250 // presence of infinities.
8251 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8252 switch (CC) {
8253 default:
8254 break;
8255 case ISD::SETOGT:
8256 case ISD::SETGT:
8257 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8258 case ISD::SETOLT:
8259 case ISD::SETLT:
8260 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8261 }
8262 }
8263
8264 // We might be able to do better than this under some circumstances, but in
8265 // general, fsel-based lowering of select is a finite-math-only optimization.
8266 // For more information, see section F.3 of the 2.06 ISA specification.
8267 // With ISA 3.0
8268 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8269 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8270 ResVT == MVT::f128)
8271 return Op;
8272
8273 // If the RHS of the comparison is a 0.0, we don't need to do the
8274 // subtraction at all.
8275 SDValue Sel1;
8276 if (isFloatingPointZero(RHS))
8277 switch (CC) {
8278 default: break; // SETUO etc aren't handled by fsel.
8279 case ISD::SETNE:
8280 std::swap(TV, FV);
8281 [[fallthrough]];
8282 case ISD::SETEQ:
8283 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8284 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8285 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8286 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8287 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8288 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8289 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8290 case ISD::SETULT:
8291 case ISD::SETLT:
8292 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8293 [[fallthrough]];
8294 case ISD::SETOGE:
8295 case ISD::SETGE:
8296 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8297 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8298 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8299 case ISD::SETUGT:
8300 case ISD::SETGT:
8301 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8302 [[fallthrough]];
8303 case ISD::SETOLE:
8304 case ISD::SETLE:
8305 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8306 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8307 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8308 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8309 }
8310
8311 SDValue Cmp;
8312 switch (CC) {
8313 default: break; // SETUO etc aren't handled by fsel.
8314 case ISD::SETNE:
8315 std::swap(TV, FV);
8316 [[fallthrough]];
8317 case ISD::SETEQ:
8318 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8319 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8320 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8321 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8322 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8323 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8324 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8325 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8326 case ISD::SETULT:
8327 case ISD::SETLT:
8328 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8329 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8330 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8331 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8332 case ISD::SETOGE:
8333 case ISD::SETGE:
8334 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8335 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8336 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8337 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8338 case ISD::SETUGT:
8339 case ISD::SETGT:
8340 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8341 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8342 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8343 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8344 case ISD::SETOLE:
8345 case ISD::SETLE:
8346 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8347 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8348 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8349 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8350 }
8351 return Op;
8352}
8353
8354static unsigned getPPCStrictOpcode(unsigned Opc) {
8355 switch (Opc) {
8356 default:
8357 llvm_unreachable("No strict version of this opcode!");
8358 case PPCISD::FCTIDZ:
8359 return PPCISD::STRICT_FCTIDZ;
8360 case PPCISD::FCTIWZ:
8361 return PPCISD::STRICT_FCTIWZ;
8362 case PPCISD::FCTIDUZ:
8364 case PPCISD::FCTIWUZ:
8366 case PPCISD::FCFID:
8367 return PPCISD::STRICT_FCFID;
8368 case PPCISD::FCFIDU:
8369 return PPCISD::STRICT_FCFIDU;
8370 case PPCISD::FCFIDS:
8371 return PPCISD::STRICT_FCFIDS;
8372 case PPCISD::FCFIDUS:
8374 }
8375}
8376
8378 const PPCSubtarget &Subtarget) {
8379 SDLoc dl(Op);
8380 bool IsStrict = Op->isStrictFPOpcode();
8381 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8382 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8383
8384 // TODO: Any other flags to propagate?
8385 SDNodeFlags Flags;
8386 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8387
8388 // For strict nodes, source is the second operand.
8389 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8390 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8391 MVT DestTy = Op.getSimpleValueType();
8392 assert(Src.getValueType().isFloatingPoint() &&
8393 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8394 DestTy == MVT::i64) &&
8395 "Invalid FP_TO_INT types");
8396 if (Src.getValueType() == MVT::f32) {
8397 if (IsStrict) {
8398 Src =
8400 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8401 Chain = Src.getValue(1);
8402 } else
8403 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8404 }
8405 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8406 DestTy = Subtarget.getScalarIntVT();
8407 unsigned Opc = ISD::DELETED_NODE;
8408 switch (DestTy.SimpleTy) {
8409 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8410 case MVT::i32:
8411 Opc = IsSigned ? PPCISD::FCTIWZ
8412 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8413 break;
8414 case MVT::i64:
8415 assert((IsSigned || Subtarget.hasFPCVT()) &&
8416 "i64 FP_TO_UINT is supported only with FPCVT");
8417 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8418 }
8419 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8420 SDValue Conv;
8421 if (IsStrict) {
8422 Opc = getPPCStrictOpcode(Opc);
8423 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8424 Flags);
8425 } else {
8426 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8427 }
8428 return Conv;
8429}
8430
8431void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8432 SelectionDAG &DAG,
8433 const SDLoc &dl) const {
8434 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8435 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8436 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8437 bool IsStrict = Op->isStrictFPOpcode();
8438
8439 // Convert the FP value to an int value through memory.
8440 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8441 (IsSigned || Subtarget.hasFPCVT());
8442 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8443 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8444 MachinePointerInfo MPI =
8446
8447 // Emit a store to the stack slot.
8448 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8449 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8450 if (i32Stack) {
8452 Alignment = Align(4);
8453 MachineMemOperand *MMO =
8454 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8455 SDValue Ops[] = { Chain, Tmp, FIPtr };
8456 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8457 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8458 } else
8459 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8460
8461 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8462 // add in a bias on big endian.
8463 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8464 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8465 DAG.getConstant(4, dl, FIPtr.getValueType()));
8466 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8467 }
8468
8469 RLI.Chain = Chain;
8470 RLI.Ptr = FIPtr;
8471 RLI.MPI = MPI;
8472 RLI.Alignment = Alignment;
8473}
8474
8475/// Custom lowers floating point to integer conversions to use
8476/// the direct move instructions available in ISA 2.07 to avoid the
8477/// need for load/store combinations.
8478SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8479 SelectionDAG &DAG,
8480 const SDLoc &dl) const {
8481 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8482 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8483 if (Op->isStrictFPOpcode())
8484 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8485 else
8486 return Mov;
8487}
8488
8489SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8490 const SDLoc &dl) const {
8491 bool IsStrict = Op->isStrictFPOpcode();
8492 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8493 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8494 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8495 EVT SrcVT = Src.getValueType();
8496 EVT DstVT = Op.getValueType();
8497
8498 // FP to INT conversions are legal for f128.
8499 if (SrcVT == MVT::f128)
8500 return Subtarget.hasP9Vector() ? Op : SDValue();
8501
8502 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8503 // PPC (the libcall is not available).
8504 if (SrcVT == MVT::ppcf128) {
8505 if (DstVT == MVT::i32) {
8506 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8507 // set other fast-math flags to FP operations in both strict and
8508 // non-strict cases. (FP_TO_SINT, FSUB)
8510 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8511
8512 if (IsSigned) {
8513 SDValue Lo, Hi;
8514 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8515
8516 // Add the two halves of the long double in round-to-zero mode, and use
8517 // a smaller FP_TO_SINT.
8518 if (IsStrict) {
8520 DAG.getVTList(MVT::f64, MVT::Other),
8521 {Op.getOperand(0), Lo, Hi}, Flags);
8522 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8523 DAG.getVTList(MVT::i32, MVT::Other),
8524 {Res.getValue(1), Res}, Flags);
8525 } else {
8526 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8527 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8528 }
8529 } else {
8530 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8531 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8532 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8533 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8534 if (IsStrict) {
8535 // Sel = Src < 0x80000000
8536 // FltOfs = select Sel, 0.0, 0x80000000
8537 // IntOfs = select Sel, 0, 0x80000000
8538 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8539 SDValue Chain = Op.getOperand(0);
8540 EVT SetCCVT =
8541 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8542 EVT DstSetCCVT =
8543 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8544 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8545 Chain, true);
8546 Chain = Sel.getValue(1);
8547
8548 SDValue FltOfs = DAG.getSelect(
8549 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8550 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8551
8552 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8553 DAG.getVTList(SrcVT, MVT::Other),
8554 {Chain, Src, FltOfs}, Flags);
8555 Chain = Val.getValue(1);
8556 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8557 DAG.getVTList(DstVT, MVT::Other),
8558 {Chain, Val}, Flags);
8559 Chain = SInt.getValue(1);
8560 SDValue IntOfs = DAG.getSelect(
8561 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8562 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8563 return DAG.getMergeValues({Result, Chain}, dl);
8564 } else {
8565 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8566 // FIXME: generated code sucks.
8567 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8568 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8569 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8570 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8571 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8572 }
8573 }
8574 }
8575
8576 return SDValue();
8577 }
8578
8579 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8580 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8581
8582 ReuseLoadInfo RLI;
8583 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8584
8585 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8586 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8587}
8588
8589// We're trying to insert a regular store, S, and then a load, L. If the
8590// incoming value, O, is a load, we might just be able to have our load use the
8591// address used by O. However, we don't know if anything else will store to
8592// that address before we can load from it. To prevent this situation, we need
8593// to insert our load, L, into the chain as a peer of O. To do this, we give L
8594// the same chain operand as O, we create a token factor from the chain results
8595// of O and L, and we replace all uses of O's chain result with that token
8596// factor (see spliceIntoChain below for this last part).
8597bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8598 ReuseLoadInfo &RLI,
8599 SelectionDAG &DAG,
8600 ISD::LoadExtType ET) const {
8601 // Conservatively skip reusing for constrained FP nodes.
8602 if (Op->isStrictFPOpcode())
8603 return false;
8604
8605 SDLoc dl(Op);
8606 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8607 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8608 if (ET == ISD::NON_EXTLOAD &&
8609 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8610 isOperationLegalOrCustom(Op.getOpcode(),
8611 Op.getOperand(0).getValueType())) {
8612
8613 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8614 return true;
8615 }
8616
8617 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8618 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8619 LD->isNonTemporal())
8620 return false;
8621 if (LD->getMemoryVT() != MemVT)
8622 return false;
8623
8624 // If the result of the load is an illegal type, then we can't build a
8625 // valid chain for reuse since the legalised loads and token factor node that
8626 // ties the legalised loads together uses a different output chain then the
8627 // illegal load.
8628 if (!isTypeLegal(LD->getValueType(0)))
8629 return false;
8630
8631 RLI.Ptr = LD->getBasePtr();
8632 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8633 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8634 "Non-pre-inc AM on PPC?");
8635 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8636 LD->getOffset());
8637 }
8638
8639 RLI.Chain = LD->getChain();
8640 RLI.MPI = LD->getPointerInfo();
8641 RLI.IsDereferenceable = LD->isDereferenceable();
8642 RLI.IsInvariant = LD->isInvariant();
8643 RLI.Alignment = LD->getAlign();
8644 RLI.AAInfo = LD->getAAInfo();
8645 RLI.Ranges = LD->getRanges();
8646
8647 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8648 return true;
8649}
8650
8651// Given the head of the old chain, ResChain, insert a token factor containing
8652// it and NewResChain, and make users of ResChain now be users of that token
8653// factor.
8654// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8655void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8656 SDValue NewResChain,
8657 SelectionDAG &DAG) const {
8658 if (!ResChain)
8659 return;
8660
8661 SDLoc dl(NewResChain);
8662
8663 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8664 NewResChain, DAG.getUNDEF(MVT::Other));
8665 assert(TF.getNode() != NewResChain.getNode() &&
8666 "A new TF really is required here");
8667
8668 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8669 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8670}
8671
8672/// Analyze profitability of direct move
8673/// prefer float load to int load plus direct move
8674/// when there is no integer use of int load
8675bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8676 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8677 if (Origin->getOpcode() != ISD::LOAD)
8678 return true;
8679
8680 // If there is no LXSIBZX/LXSIHZX, like Power8,
8681 // prefer direct move if the memory size is 1 or 2 bytes.
8682 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8683 if (!Subtarget.hasP9Vector() &&
8684 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8685 return true;
8686
8687 for (SDUse &Use : Origin->uses()) {
8688
8689 // Only look at the users of the loaded value.
8690 if (Use.getResNo() != 0)
8691 continue;
8692
8693 SDNode *User = Use.getUser();
8694 if (User->getOpcode() != ISD::SINT_TO_FP &&
8695 User->getOpcode() != ISD::UINT_TO_FP &&
8696 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8697 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8698 return true;
8699 }
8700
8701 return false;
8702}
8703
8705 const PPCSubtarget &Subtarget,
8706 SDValue Chain = SDValue()) {
8707 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8708 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8709 SDLoc dl(Op);
8710
8711 // TODO: Any other flags to propagate?
8712 SDNodeFlags Flags;
8713 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8714
8715 // If we have FCFIDS, then use it when converting to single-precision.
8716 // Otherwise, convert to double-precision and then round.
8717 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8718 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8719 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8720 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8721 if (Op->isStrictFPOpcode()) {
8722 if (!Chain)
8723 Chain = Op.getOperand(0);
8724 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8725 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8726 } else
8727 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8728}
8729
8730/// Custom lowers integer to floating point conversions to use
8731/// the direct move instructions available in ISA 2.07 to avoid the
8732/// need for load/store combinations.
8733SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8734 SelectionDAG &DAG,
8735 const SDLoc &dl) const {
8736 assert((Op.getValueType() == MVT::f32 ||
8737 Op.getValueType() == MVT::f64) &&
8738 "Invalid floating point type as target of conversion");
8739 assert(Subtarget.hasFPCVT() &&
8740 "Int to FP conversions with direct moves require FPCVT");
8741 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8742 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8743 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8744 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8745 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8746 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8747 return convertIntToFP(Op, Mov, DAG, Subtarget);
8748}
8749
8750static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8751
8752 EVT VecVT = Vec.getValueType();
8753 assert(VecVT.isVector() && "Expected a vector type.");
8754 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8755
8756 EVT EltVT = VecVT.getVectorElementType();
8757 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8758 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8759
8760 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8761 SmallVector<SDValue, 16> Ops(NumConcat);
8762 Ops[0] = Vec;
8763 SDValue UndefVec = DAG.getUNDEF(VecVT);
8764 for (unsigned i = 1; i < NumConcat; ++i)
8765 Ops[i] = UndefVec;
8766
8767 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8768}
8769
8770SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8771 const SDLoc &dl) const {
8772 bool IsStrict = Op->isStrictFPOpcode();
8773 unsigned Opc = Op.getOpcode();
8774 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8775 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8777 "Unexpected conversion type");
8778 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8779 "Supports conversions to v2f64/v4f32 only.");
8780
8781 // TODO: Any other flags to propagate?
8783 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8784
8785 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8786 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8787
8788 SDValue Wide = widenVec(DAG, Src, dl);
8789 EVT WideVT = Wide.getValueType();
8790 unsigned WideNumElts = WideVT.getVectorNumElements();
8791 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8792
8793 SmallVector<int, 16> ShuffV;
8794 for (unsigned i = 0; i < WideNumElts; ++i)
8795 ShuffV.push_back(i + WideNumElts);
8796
8797 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8798 int SaveElts = FourEltRes ? 4 : 2;
8799 if (Subtarget.isLittleEndian())
8800 for (int i = 0; i < SaveElts; i++)
8801 ShuffV[i * Stride] = i;
8802 else
8803 for (int i = 1; i <= SaveElts; i++)
8804 ShuffV[i * Stride - 1] = i - 1;
8805
8806 SDValue ShuffleSrc2 =
8807 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8808 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8809
8810 SDValue Extend;
8811 if (SignedConv) {
8812 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8813 EVT ExtVT = Src.getValueType();
8814 if (Subtarget.hasP9Altivec())
8815 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8816 IntermediateVT.getVectorNumElements());
8817
8818 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8819 DAG.getValueType(ExtVT));
8820 } else
8821 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8822
8823 if (IsStrict)
8824 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8825 {Op.getOperand(0), Extend}, Flags);
8826
8827 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8828}
8829
8830SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8831 SelectionDAG &DAG) const {
8832 SDLoc dl(Op);
8833 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8834 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8835 bool IsStrict = Op->isStrictFPOpcode();
8836 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8837 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8838
8839 // TODO: Any other flags to propagate?
8841 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8842
8843 EVT InVT = Src.getValueType();
8844 EVT OutVT = Op.getValueType();
8845 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8846 isOperationCustom(Op.getOpcode(), InVT))
8847 return LowerINT_TO_FPVector(Op, DAG, dl);
8848
8849 // Conversions to f128 are legal.
8850 if (Op.getValueType() == MVT::f128)
8851 return Subtarget.hasP9Vector() ? Op : SDValue();
8852
8853 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8854 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8855 return SDValue();
8856
8857 if (Src.getValueType() == MVT::i1) {
8858 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8859 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8860 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8861 if (IsStrict)
8862 return DAG.getMergeValues({Sel, Chain}, dl);
8863 else
8864 return Sel;
8865 }
8866
8867 // If we have direct moves, we can do all the conversion, skip the store/load
8868 // however, without FPCVT we can't do most conversions.
8869 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8870 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8871 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8872
8873 assert((IsSigned || Subtarget.hasFPCVT()) &&
8874 "UINT_TO_FP is supported only with FPCVT");
8875
8876 if (Src.getValueType() == MVT::i64) {
8877 SDValue SINT = Src;
8878 // When converting to single-precision, we actually need to convert
8879 // to double-precision first and then round to single-precision.
8880 // To avoid double-rounding effects during that operation, we have
8881 // to prepare the input operand. Bits that might be truncated when
8882 // converting to double-precision are replaced by a bit that won't
8883 // be lost at this stage, but is below the single-precision rounding
8884 // position.
8885 //
8886 // However, if -enable-unsafe-fp-math is in effect, accept double
8887 // rounding to avoid the extra overhead.
8888 if (Op.getValueType() == MVT::f32 &&
8889 !Subtarget.hasFPCVT() &&
8891
8892 // Twiddle input to make sure the low 11 bits are zero. (If this
8893 // is the case, we are guaranteed the value will fit into the 53 bit
8894 // mantissa of an IEEE double-precision value without rounding.)
8895 // If any of those low 11 bits were not zero originally, make sure
8896 // bit 12 (value 2048) is set instead, so that the final rounding
8897 // to single-precision gets the correct result.
8898 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8899 SINT, DAG.getConstant(2047, dl, MVT::i64));
8900 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8901 Round, DAG.getConstant(2047, dl, MVT::i64));
8902 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8903 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8904 Round, DAG.getConstant(-2048, dl, MVT::i64));
8905
8906 // However, we cannot use that value unconditionally: if the magnitude
8907 // of the input value is small, the bit-twiddling we did above might
8908 // end up visibly changing the output. Fortunately, in that case, we
8909 // don't need to twiddle bits since the original input will convert
8910 // exactly to double-precision floating-point already. Therefore,
8911 // construct a conditional to use the original value if the top 11
8912 // bits are all sign-bit copies, and use the rounded value computed
8913 // above otherwise.
8914 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8915 SINT, DAG.getConstant(53, dl, MVT::i32));
8916 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8917 Cond, DAG.getConstant(1, dl, MVT::i64));
8918 Cond = DAG.getSetCC(
8919 dl,
8920 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8921 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8922
8923 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8924 }
8925
8926 ReuseLoadInfo RLI;
8927 SDValue Bits;
8928
8930 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8931 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8932 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8933 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8934 } else if (Subtarget.hasLFIWAX() &&
8935 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8936 MachineMemOperand *MMO =
8938 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8939 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8941 DAG.getVTList(MVT::f64, MVT::Other),
8942 Ops, MVT::i32, MMO);
8943 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8944 } else if (Subtarget.hasFPCVT() &&
8945 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8946 MachineMemOperand *MMO =
8948 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8949 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8951 DAG.getVTList(MVT::f64, MVT::Other),
8952 Ops, MVT::i32, MMO);
8953 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8954 } else if (((Subtarget.hasLFIWAX() &&
8955 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8956 (Subtarget.hasFPCVT() &&
8957 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8958 SINT.getOperand(0).getValueType() == MVT::i32) {
8959 MachineFrameInfo &MFI = MF.getFrameInfo();
8960 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8961
8962 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8963 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8964
8965 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8967 DAG.getMachineFunction(), FrameIdx));
8968 Chain = Store;
8969
8970 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8971 "Expected an i32 store");
8972
8973 RLI.Ptr = FIdx;
8974 RLI.Chain = Chain;
8975 RLI.MPI =
8977 RLI.Alignment = Align(4);
8978
8979 MachineMemOperand *MMO =
8981 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8982 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8985 dl, DAG.getVTList(MVT::f64, MVT::Other),
8986 Ops, MVT::i32, MMO);
8987 Chain = Bits.getValue(1);
8988 } else
8989 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8990
8991 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8992 if (IsStrict)
8993 Chain = FP.getValue(1);
8994
8995 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8996 if (IsStrict)
8997 FP = DAG.getNode(
8998 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8999 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9000 Flags);
9001 else
9002 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9003 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9004 }
9005 return FP;
9006 }
9007
9008 assert(Src.getValueType() == MVT::i32 &&
9009 "Unhandled INT_TO_FP type in custom expander!");
9010 // Since we only generate this in 64-bit mode, we can take advantage of
9011 // 64-bit registers. In particular, sign extend the input value into the
9012 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9013 // then lfd it and fcfid it.
9015 MachineFrameInfo &MFI = MF.getFrameInfo();
9016 EVT PtrVT = getPointerTy(MF.getDataLayout());
9017
9018 SDValue Ld;
9019 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9020 ReuseLoadInfo RLI;
9021 bool ReusingLoad;
9022 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9023 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9024 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9025
9026 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9028 DAG.getMachineFunction(), FrameIdx));
9029 Chain = Store;
9030
9031 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9032 "Expected an i32 store");
9033
9034 RLI.Ptr = FIdx;
9035 RLI.Chain = Chain;
9036 RLI.MPI =
9038 RLI.Alignment = Align(4);
9039 }
9040
9041 MachineMemOperand *MMO =
9043 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9044 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9045 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9046 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9047 MVT::i32, MMO);
9048 Chain = Ld.getValue(1);
9049 if (ReusingLoad)
9050 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
9051 } else {
9052 assert(Subtarget.isPPC64() &&
9053 "i32->FP without LFIWAX supported only on PPC64");
9054
9055 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9056 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9057
9058 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9059
9060 // STD the extended value into the stack slot.
9061 SDValue Store = DAG.getStore(
9062 Chain, dl, Ext64, FIdx,
9064 Chain = Store;
9065
9066 // Load the value as a double.
9067 Ld = DAG.getLoad(
9068 MVT::f64, dl, Chain, FIdx,
9070 Chain = Ld.getValue(1);
9071 }
9072
9073 // FCFID it and return it.
9074 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9075 if (IsStrict)
9076 Chain = FP.getValue(1);
9077 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9078 if (IsStrict)
9079 FP = DAG.getNode(
9080 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9081 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9082 else
9083 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9084 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9085 }
9086 return FP;
9087}
9088
9089SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9090 SelectionDAG &DAG) const {
9091 SDLoc Dl(Op);
9093 EVT PtrVT = getPointerTy(MF.getDataLayout());
9094 SDValue Chain = Op.getOperand(0);
9095
9096 // If requested mode is constant, just use simpler mtfsb/mffscrni
9097 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9098 uint64_t Mode = CVal->getZExtValue();
9099 assert(Mode < 4 && "Unsupported rounding mode!");
9100 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9101 if (Subtarget.isISA3_0())
9102 return SDValue(
9103 DAG.getMachineNode(
9104 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9105 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9106 1);
9107 SDNode *SetHi = DAG.getMachineNode(
9108 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9109 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9110 SDNode *SetLo = DAG.getMachineNode(
9111 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9112 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9113 return SDValue(SetLo, 0);
9114 }
9115
9116 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9117 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9118 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9119 DAG.getConstant(3, Dl, MVT::i32));
9120 SDValue DstFlag = DAG.getNode(
9121 ISD::XOR, Dl, MVT::i32, SrcFlag,
9122 DAG.getNode(ISD::AND, Dl, MVT::i32,
9123 DAG.getNOT(Dl,
9124 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9125 MVT::i32),
9126 One));
9127 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9128 SDValue MFFS;
9129 if (!Subtarget.isISA3_0()) {
9130 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9131 Chain = MFFS.getValue(1);
9132 }
9133 SDValue NewFPSCR;
9134 if (Subtarget.isPPC64()) {
9135 if (Subtarget.isISA3_0()) {
9136 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9137 } else {
9138 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9139 SDNode *InsertRN = DAG.getMachineNode(
9140 PPC::RLDIMI, Dl, MVT::i64,
9141 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9142 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9143 DAG.getTargetConstant(0, Dl, MVT::i32),
9144 DAG.getTargetConstant(62, Dl, MVT::i32)});
9145 NewFPSCR = SDValue(InsertRN, 0);
9146 }
9147 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9148 } else {
9149 // In 32-bit mode, store f64, load and update the lower half.
9150 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9151 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9152 SDValue Addr = Subtarget.isLittleEndian()
9153 ? StackSlot
9154 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9155 DAG.getConstant(4, Dl, PtrVT));
9156 if (Subtarget.isISA3_0()) {
9157 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9158 } else {
9159 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9160 SDValue Tmp =
9161 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9162 Chain = Tmp.getValue(1);
9163 Tmp = SDValue(DAG.getMachineNode(
9164 PPC::RLWIMI, Dl, MVT::i32,
9165 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9166 DAG.getTargetConstant(30, Dl, MVT::i32),
9167 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9168 0);
9169 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9170 }
9171 NewFPSCR =
9172 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9173 Chain = NewFPSCR.getValue(1);
9174 }
9175 if (Subtarget.isISA3_0())
9176 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9177 {NewFPSCR, Chain}),
9178 1);
9179 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9180 SDNode *MTFSF = DAG.getMachineNode(
9181 PPC::MTFSF, Dl, MVT::Other,
9182 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9183 return SDValue(MTFSF, 0);
9184}
9185
9186SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9187 SelectionDAG &DAG) const {
9188 SDLoc dl(Op);
9189 /*
9190 The rounding mode is in bits 30:31 of FPSR, and has the following
9191 settings:
9192 00 Round to nearest
9193 01 Round to 0
9194 10 Round to +inf
9195 11 Round to -inf
9196
9197 GET_ROUNDING, on the other hand, expects the following:
9198 -1 Undefined
9199 0 Round to 0
9200 1 Round to nearest
9201 2 Round to +inf
9202 3 Round to -inf
9203
9204 To perform the conversion, we do:
9205 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9206 */
9207
9209 EVT VT = Op.getValueType();
9210 EVT PtrVT = getPointerTy(MF.getDataLayout());
9211
9212 // Save FP Control Word to register
9213 SDValue Chain = Op.getOperand(0);
9214 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9215 Chain = MFFS.getValue(1);
9216
9217 SDValue CWD;
9218 if (isTypeLegal(MVT::i64)) {
9219 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9220 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9221 } else {
9222 // Save FP register to stack slot
9223 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9224 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9225 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9226
9227 // Load FP Control Word from low 32 bits of stack slot.
9229 "Stack slot adjustment is valid only on big endian subtargets!");
9230 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9231 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9232 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9233 Chain = CWD.getValue(1);
9234 }
9235
9236 // Transform as necessary
9237 SDValue CWD1 =
9238 DAG.getNode(ISD::AND, dl, MVT::i32,
9239 CWD, DAG.getConstant(3, dl, MVT::i32));
9240 SDValue CWD2 =
9241 DAG.getNode(ISD::SRL, dl, MVT::i32,
9242 DAG.getNode(ISD::AND, dl, MVT::i32,
9243 DAG.getNode(ISD::XOR, dl, MVT::i32,
9244 CWD, DAG.getConstant(3, dl, MVT::i32)),
9245 DAG.getConstant(3, dl, MVT::i32)),
9246 DAG.getConstant(1, dl, MVT::i32));
9247
9248 SDValue RetVal =
9249 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9250
9251 RetVal =
9253 dl, VT, RetVal);
9254
9255 return DAG.getMergeValues({RetVal, Chain}, dl);
9256}
9257
9258SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9259 EVT VT = Op.getValueType();
9260 unsigned BitWidth = VT.getSizeInBits();
9261 SDLoc dl(Op);
9262 assert(Op.getNumOperands() == 3 &&
9263 VT == Op.getOperand(1).getValueType() &&
9264 "Unexpected SHL!");
9265
9266 // Expand into a bunch of logical ops. Note that these ops
9267 // depend on the PPC behavior for oversized shift amounts.
9268 SDValue Lo = Op.getOperand(0);
9269 SDValue Hi = Op.getOperand(1);
9270 SDValue Amt = Op.getOperand(2);
9271 EVT AmtVT = Amt.getValueType();
9272
9273 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9274 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9275 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9276 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9277 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9278 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9279 DAG.getConstant(-BitWidth, dl, AmtVT));
9280 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9281 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9282 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9283 SDValue OutOps[] = { OutLo, OutHi };
9284 return DAG.getMergeValues(OutOps, dl);
9285}
9286
9287SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9288 EVT VT = Op.getValueType();
9289 SDLoc dl(Op);
9290 unsigned BitWidth = VT.getSizeInBits();
9291 assert(Op.getNumOperands() == 3 &&
9292 VT == Op.getOperand(1).getValueType() &&
9293 "Unexpected SRL!");
9294
9295 // Expand into a bunch of logical ops. Note that these ops
9296 // depend on the PPC behavior for oversized shift amounts.
9297 SDValue Lo = Op.getOperand(0);
9298 SDValue Hi = Op.getOperand(1);
9299 SDValue Amt = Op.getOperand(2);
9300 EVT AmtVT = Amt.getValueType();
9301
9302 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9303 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9304 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9305 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9306 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9307 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9308 DAG.getConstant(-BitWidth, dl, AmtVT));
9309 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9310 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9311 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9312 SDValue OutOps[] = { OutLo, OutHi };
9313 return DAG.getMergeValues(OutOps, dl);
9314}
9315
9316SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9317 SDLoc dl(Op);
9318 EVT VT = Op.getValueType();
9319 unsigned BitWidth = VT.getSizeInBits();
9320 assert(Op.getNumOperands() == 3 &&
9321 VT == Op.getOperand(1).getValueType() &&
9322 "Unexpected SRA!");
9323
9324 // Expand into a bunch of logical ops, followed by a select_cc.
9325 SDValue Lo = Op.getOperand(0);
9326 SDValue Hi = Op.getOperand(1);
9327 SDValue Amt = Op.getOperand(2);
9328 EVT AmtVT = Amt.getValueType();
9329
9330 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9331 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9332 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9333 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9334 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9335 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9336 DAG.getConstant(-BitWidth, dl, AmtVT));
9337 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9338 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9339 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9340 Tmp4, Tmp6, ISD::SETLE);
9341 SDValue OutOps[] = { OutLo, OutHi };
9342 return DAG.getMergeValues(OutOps, dl);
9343}
9344
9345SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9346 SelectionDAG &DAG) const {
9347 SDLoc dl(Op);
9348 EVT VT = Op.getValueType();
9349 unsigned BitWidth = VT.getSizeInBits();
9350
9351 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9352 SDValue X = Op.getOperand(0);
9353 SDValue Y = Op.getOperand(1);
9354 SDValue Z = Op.getOperand(2);
9355 EVT AmtVT = Z.getValueType();
9356
9357 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9358 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9359 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9360 // on PowerPC shift by BW being well defined.
9361 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9362 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9363 SDValue SubZ =
9364 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9365 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9366 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9367 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9368}
9369
9370//===----------------------------------------------------------------------===//
9371// Vector related lowering.
9372//
9373
9374/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9375/// element size of SplatSize. Cast the result to VT.
9376static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9377 SelectionDAG &DAG, const SDLoc &dl) {
9378 static const MVT VTys[] = { // canonical VT to use for each size.
9379 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9380 };
9381
9382 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9383
9384 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9385 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9386 SplatSize = 1;
9387 Val = 0xFF;
9388 }
9389
9390 EVT CanonicalVT = VTys[SplatSize-1];
9391
9392 // Build a canonical splat for this value.
9393 // Explicitly truncate APInt here, as this API is used with a mix of
9394 // signed and unsigned values.
9395 return DAG.getBitcast(
9396 ReqVT,
9397 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9398}
9399
9400/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9401/// specified intrinsic ID.
9403 const SDLoc &dl, EVT DestVT = MVT::Other) {
9404 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9405 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9406 DAG.getConstant(IID, dl, MVT::i32), Op);
9407}
9408
9409/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9410/// specified intrinsic ID.
9411static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9412 SelectionDAG &DAG, const SDLoc &dl,
9413 EVT DestVT = MVT::Other) {
9414 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9415 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9416 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9417}
9418
9419/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9420/// specified intrinsic ID.
9421static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9422 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9423 EVT DestVT = MVT::Other) {
9424 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9425 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9426 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9427}
9428
9429/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9430/// amount. The result has the specified value type.
9431static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9432 SelectionDAG &DAG, const SDLoc &dl) {
9433 // Force LHS/RHS to be the right type.
9434 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9435 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9436
9437 int Ops[16];
9438 for (unsigned i = 0; i != 16; ++i)
9439 Ops[i] = i + Amt;
9440 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9441 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9442}
9443
9444/// Do we have an efficient pattern in a .td file for this node?
9445///
9446/// \param V - pointer to the BuildVectorSDNode being matched
9447/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9448///
9449/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9450/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9451/// the opposite is true (expansion is beneficial) are:
9452/// - The node builds a vector out of integers that are not 32 or 64-bits
9453/// - The node builds a vector out of constants
9454/// - The node is a "load-and-splat"
9455/// In all other cases, we will choose to keep the BUILD_VECTOR.
9457 bool HasDirectMove,
9458 bool HasP8Vector) {
9459 EVT VecVT = V->getValueType(0);
9460 bool RightType = VecVT == MVT::v2f64 ||
9461 (HasP8Vector && VecVT == MVT::v4f32) ||
9462 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9463 if (!RightType)
9464 return false;
9465
9466 bool IsSplat = true;
9467 bool IsLoad = false;
9468 SDValue Op0 = V->getOperand(0);
9469
9470 // This function is called in a block that confirms the node is not a constant
9471 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9472 // different constants.
9473 if (V->isConstant())
9474 return false;
9475 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9476 if (V->getOperand(i).isUndef())
9477 return false;
9478 // We want to expand nodes that represent load-and-splat even if the
9479 // loaded value is a floating point truncation or conversion to int.
9480 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9481 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9482 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9483 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9484 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9485 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9486 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9487 IsLoad = true;
9488 // If the operands are different or the input is not a load and has more
9489 // uses than just this BV node, then it isn't a splat.
9490 if (V->getOperand(i) != Op0 ||
9491 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9492 IsSplat = false;
9493 }
9494 return !(IsSplat && IsLoad);
9495}
9496
9497// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9498SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9499
9500 SDLoc dl(Op);
9501 SDValue Op0 = Op->getOperand(0);
9502
9503 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9504 (Op.getValueType() != MVT::f128))
9505 return SDValue();
9506
9507 SDValue Lo = Op0.getOperand(0);
9508 SDValue Hi = Op0.getOperand(1);
9509 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9510 return SDValue();
9511
9512 if (!Subtarget.isLittleEndian())
9513 std::swap(Lo, Hi);
9514
9515 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9516}
9517
9518static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9519 const SDValue *InputLoad = &Op;
9520 while (InputLoad->getOpcode() == ISD::BITCAST)
9521 InputLoad = &InputLoad->getOperand(0);
9522 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9524 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9525 InputLoad = &InputLoad->getOperand(0);
9526 }
9527 if (InputLoad->getOpcode() != ISD::LOAD)
9528 return nullptr;
9529 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9530 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9531}
9532
9533// Convert the argument APFloat to a single precision APFloat if there is no
9534// loss in information during the conversion to single precision APFloat and the
9535// resulting number is not a denormal number. Return true if successful.
9537 APFloat APFloatToConvert = ArgAPFloat;
9538 bool LosesInfo = true;
9540 &LosesInfo);
9541 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9542 if (Success)
9543 ArgAPFloat = APFloatToConvert;
9544 return Success;
9545}
9546
9547// Bitcast the argument APInt to a double and convert it to a single precision
9548// APFloat, bitcast the APFloat to an APInt and assign it to the original
9549// argument if there is no loss in information during the conversion from
9550// double to single precision APFloat and the resulting number is not a denormal
9551// number. Return true if successful.
9553 double DpValue = ArgAPInt.bitsToDouble();
9554 APFloat APFloatDp(DpValue);
9555 bool Success = convertToNonDenormSingle(APFloatDp);
9556 if (Success)
9557 ArgAPInt = APFloatDp.bitcastToAPInt();
9558 return Success;
9559}
9560
9561// Nondestructive check for convertTonNonDenormSingle.
9563 // Only convert if it loses info, since XXSPLTIDP should
9564 // handle the other case.
9565 APFloat APFloatToConvert = ArgAPFloat;
9566 bool LosesInfo = true;
9568 &LosesInfo);
9569
9570 return (!LosesInfo && !APFloatToConvert.isDenormal());
9571}
9572
9573static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9574 unsigned &Opcode) {
9575 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9576 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9577 return false;
9578
9579 EVT Ty = Op->getValueType(0);
9580 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9581 // as we cannot handle extending loads for these types.
9582 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9583 ISD::isNON_EXTLoad(InputNode))
9584 return true;
9585
9586 EVT MemVT = InputNode->getMemoryVT();
9587 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9588 // memory VT is the same vector element VT type.
9589 // The loads feeding into the v8i16 and v16i8 types will be extending because
9590 // scalar i8/i16 are not legal types.
9591 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9592 (MemVT == Ty.getVectorElementType()))
9593 return true;
9594
9595 if (Ty == MVT::v2i64) {
9596 // Check the extend type, when the input type is i32, and the output vector
9597 // type is v2i64.
9598 if (MemVT == MVT::i32) {
9599 if (ISD::isZEXTLoad(InputNode))
9600 Opcode = PPCISD::ZEXT_LD_SPLAT;
9601 if (ISD::isSEXTLoad(InputNode))
9602 Opcode = PPCISD::SEXT_LD_SPLAT;
9603 }
9604 return true;
9605 }
9606 return false;
9607}
9608
9609// If this is a case we can't handle, return null and let the default
9610// expansion code take care of it. If we CAN select this case, and if it
9611// selects to a single instruction, return Op. Otherwise, if we can codegen
9612// this case more efficiently than a constant pool load, lower it to the
9613// sequence of ops that should be used.
9614SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9615 SelectionDAG &DAG) const {
9616 SDLoc dl(Op);
9617 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9618 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9619
9620 // Check if this is a splat of a constant value.
9621 APInt APSplatBits, APSplatUndef;
9622 unsigned SplatBitSize;
9623 bool HasAnyUndefs;
9624 bool BVNIsConstantSplat =
9625 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9626 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9627
9628 // If it is a splat of a double, check if we can shrink it to a 32 bit
9629 // non-denormal float which when converted back to double gives us the same
9630 // double. This is to exploit the XXSPLTIDP instruction.
9631 // If we lose precision, we use XXSPLTI32DX.
9632 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9633 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9634 // Check the type first to short-circuit so we don't modify APSplatBits if
9635 // this block isn't executed.
9636 if ((Op->getValueType(0) == MVT::v2f64) &&
9637 convertToNonDenormSingle(APSplatBits)) {
9638 SDValue SplatNode = DAG.getNode(
9639 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9640 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9641 return DAG.getBitcast(Op.getValueType(), SplatNode);
9642 } else {
9643 // We may lose precision, so we have to use XXSPLTI32DX.
9644
9645 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9646 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9647 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9648
9649 if (!Hi || !Lo)
9650 // If either load is 0, then we should generate XXLXOR to set to 0.
9651 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9652
9653 if (Hi)
9654 SplatNode = DAG.getNode(
9655 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9656 DAG.getTargetConstant(0, dl, MVT::i32),
9657 DAG.getTargetConstant(Hi, dl, MVT::i32));
9658
9659 if (Lo)
9660 SplatNode =
9661 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9662 DAG.getTargetConstant(1, dl, MVT::i32),
9663 DAG.getTargetConstant(Lo, dl, MVT::i32));
9664
9665 return DAG.getBitcast(Op.getValueType(), SplatNode);
9666 }
9667 }
9668
9669 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9670 unsigned NewOpcode = PPCISD::LD_SPLAT;
9671
9672 // Handle load-and-splat patterns as we have instructions that will do this
9673 // in one go.
9674 if (DAG.isSplatValue(Op, true) &&
9675 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9676 const SDValue *InputLoad = &Op.getOperand(0);
9677 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9678
9679 // If the input load is an extending load, it will be an i32 -> i64
9680 // extending load and isValidSplatLoad() will update NewOpcode.
9681 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9682 unsigned ElementSize =
9683 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9684
9685 assert(((ElementSize == 2 * MemorySize)
9686 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9687 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9688 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9689 "Unmatched element size and opcode!\n");
9690
9691 // Checking for a single use of this load, we have to check for vector
9692 // width (128 bits) / ElementSize uses (since each operand of the
9693 // BUILD_VECTOR is a separate use of the value.
9694 unsigned NumUsesOfInputLD = 128 / ElementSize;
9695 for (SDValue BVInOp : Op->ops())
9696 if (BVInOp.isUndef())
9697 NumUsesOfInputLD--;
9698
9699 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9700 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9701 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9702 // 15", but function IsValidSplatLoad() now will only return true when
9703 // the data at index 0 is not nullptr. So we will not get into trouble for
9704 // these cases.
9705 //
9706 // case 1 - lfiwzx/lfiwax
9707 // 1.1: load result is i32 and is sign/zero extend to i64;
9708 // 1.2: build a v2i64 vector type with above loaded value;
9709 // 1.3: the vector has only one value at index 0, others are all undef;
9710 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9711 if (NumUsesOfInputLD == 1 &&
9712 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9713 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9714 Subtarget.hasLFIWAX()))
9715 return SDValue();
9716
9717 // case 2 - lxvr[hb]x
9718 // 2.1: load result is at most i16;
9719 // 2.2: build a vector with above loaded value;
9720 // 2.3: the vector has only one value at index 0, others are all undef;
9721 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9722 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9723 Subtarget.isISA3_1() && ElementSize <= 16)
9724 return SDValue();
9725
9726 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9727 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9728 Subtarget.hasVSX()) {
9729 SDValue Ops[] = {
9730 LD->getChain(), // Chain
9731 LD->getBasePtr(), // Ptr
9732 DAG.getValueType(Op.getValueType()) // VT
9733 };
9734 SDValue LdSplt = DAG.getMemIntrinsicNode(
9735 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9736 LD->getMemoryVT(), LD->getMemOperand());
9737 // Replace all uses of the output chain of the original load with the
9738 // output chain of the new load.
9739 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9740 LdSplt.getValue(1));
9741 return LdSplt;
9742 }
9743 }
9744
9745 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9746 // 32-bits can be lowered to VSX instructions under certain conditions.
9747 // Without VSX, there is no pattern more efficient than expanding the node.
9748 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9749 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9750 Subtarget.hasP8Vector()))
9751 return Op;
9752 return SDValue();
9753 }
9754
9755 uint64_t SplatBits = APSplatBits.getZExtValue();
9756 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9757 unsigned SplatSize = SplatBitSize / 8;
9758
9759 // First, handle single instruction cases.
9760
9761 // All zeros?
9762 if (SplatBits == 0) {
9763 // Canonicalize all zero vectors to be v4i32.
9764 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9765 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9766 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9767 }
9768 return Op;
9769 }
9770
9771 // We have XXSPLTIW for constant splats four bytes wide.
9772 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9773 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9774 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9775 // turned into a 4-byte splat of 0xABABABAB.
9776 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9777 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9778 Op.getValueType(), DAG, dl);
9779
9780 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9781 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9782 dl);
9783
9784 // We have XXSPLTIB for constant splats one byte wide.
9785 if (Subtarget.hasP9Vector() && SplatSize == 1)
9786 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9787 dl);
9788
9789 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9790 int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
9791 if (SextVal >= -16 && SextVal <= 15)
9792 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9793 dl);
9794
9795 // Two instruction sequences.
9796
9797 // If this value is in the range [-32,30] and is even, use:
9798 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9799 // If this value is in the range [17,31] and is odd, use:
9800 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9801 // If this value is in the range [-31,-17] and is odd, use:
9802 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9803 // Note the last two are three-instruction sequences.
9804 if (SextVal >= -32 && SextVal <= 31) {
9805 // To avoid having these optimizations undone by constant folding,
9806 // we convert to a pseudo that will be expanded later into one of
9807 // the above forms.
9808 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9809 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9810 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9811 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9812 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9813 if (VT == Op.getValueType())
9814 return RetVal;
9815 else
9816 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9817 }
9818
9819 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9820 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9821 // for fneg/fabs.
9822 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9823 // Make -1 and vspltisw -1:
9824 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9825
9826 // Make the VSLW intrinsic, computing 0x8000_0000.
9827 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9828 OnesV, DAG, dl);
9829
9830 // xor by OnesV to invert it.
9831 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9832 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9833 }
9834
9835 // Check to see if this is a wide variety of vsplti*, binop self cases.
9836 static const signed char SplatCsts[] = {
9837 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9838 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9839 };
9840
9841 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9842 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9843 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9844 int i = SplatCsts[idx];
9845
9846 // Figure out what shift amount will be used by altivec if shifted by i in
9847 // this splat size.
9848 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9849
9850 // vsplti + shl self.
9851 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9852 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9853 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9854 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9855 Intrinsic::ppc_altivec_vslw
9856 };
9857 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9858 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9859 }
9860
9861 // vsplti + srl self.
9862 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9863 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9864 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9865 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9866 Intrinsic::ppc_altivec_vsrw
9867 };
9868 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9869 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9870 }
9871
9872 // vsplti + rol self.
9873 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9874 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9875 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9876 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9877 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9878 Intrinsic::ppc_altivec_vrlw
9879 };
9880 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9881 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9882 }
9883
9884 // t = vsplti c, result = vsldoi t, t, 1
9885 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9886 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9887 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9888 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9889 }
9890 // t = vsplti c, result = vsldoi t, t, 2
9891 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9892 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9893 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9894 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9895 }
9896 // t = vsplti c, result = vsldoi t, t, 3
9897 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9898 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9899 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9900 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9901 }
9902 }
9903
9904 return SDValue();
9905}
9906
9907/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9908/// the specified operations to build the shuffle.
9909static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9910 SDValue RHS, SelectionDAG &DAG,
9911 const SDLoc &dl) {
9912 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9913 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9914 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9915
9916 enum {
9917 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9918 OP_VMRGHW,
9919 OP_VMRGLW,
9920 OP_VSPLTISW0,
9921 OP_VSPLTISW1,
9922 OP_VSPLTISW2,
9923 OP_VSPLTISW3,
9924 OP_VSLDOI4,
9925 OP_VSLDOI8,
9926 OP_VSLDOI12
9927 };
9928
9929 if (OpNum == OP_COPY) {
9930 if (LHSID == (1*9+2)*9+3) return LHS;
9931 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9932 return RHS;
9933 }
9934
9935 SDValue OpLHS, OpRHS;
9936 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9937 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9938
9939 int ShufIdxs[16];
9940 switch (OpNum) {
9941 default: llvm_unreachable("Unknown i32 permute!");
9942 case OP_VMRGHW:
9943 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9944 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9945 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9946 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9947 break;
9948 case OP_VMRGLW:
9949 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9950 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9951 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9952 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9953 break;
9954 case OP_VSPLTISW0:
9955 for (unsigned i = 0; i != 16; ++i)
9956 ShufIdxs[i] = (i&3)+0;
9957 break;
9958 case OP_VSPLTISW1:
9959 for (unsigned i = 0; i != 16; ++i)
9960 ShufIdxs[i] = (i&3)+4;
9961 break;
9962 case OP_VSPLTISW2:
9963 for (unsigned i = 0; i != 16; ++i)
9964 ShufIdxs[i] = (i&3)+8;
9965 break;
9966 case OP_VSPLTISW3:
9967 for (unsigned i = 0; i != 16; ++i)
9968 ShufIdxs[i] = (i&3)+12;
9969 break;
9970 case OP_VSLDOI4:
9971 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9972 case OP_VSLDOI8:
9973 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9974 case OP_VSLDOI12:
9975 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9976 }
9977 EVT VT = OpLHS.getValueType();
9978 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9979 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9980 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9981 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9982}
9983
9984/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9985/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9986/// SDValue.
9987SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9988 SelectionDAG &DAG) const {
9989 const unsigned BytesInVector = 16;
9990 bool IsLE = Subtarget.isLittleEndian();
9991 SDLoc dl(N);
9992 SDValue V1 = N->getOperand(0);
9993 SDValue V2 = N->getOperand(1);
9994 unsigned ShiftElts = 0, InsertAtByte = 0;
9995 bool Swap = false;
9996
9997 // Shifts required to get the byte we want at element 7.
9998 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9999 0, 15, 14, 13, 12, 11, 10, 9};
10000 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10001 1, 2, 3, 4, 5, 6, 7, 8};
10002
10003 ArrayRef<int> Mask = N->getMask();
10004 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10005
10006 // For each mask element, find out if we're just inserting something
10007 // from V2 into V1 or vice versa.
10008 // Possible permutations inserting an element from V2 into V1:
10009 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10010 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10011 // ...
10012 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10013 // Inserting from V1 into V2 will be similar, except mask range will be
10014 // [16,31].
10015
10016 bool FoundCandidate = false;
10017 // If both vector operands for the shuffle are the same vector, the mask
10018 // will contain only elements from the first one and the second one will be
10019 // undef.
10020 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10021 // Go through the mask of half-words to find an element that's being moved
10022 // from one vector to the other.
10023 for (unsigned i = 0; i < BytesInVector; ++i) {
10024 unsigned CurrentElement = Mask[i];
10025 // If 2nd operand is undefined, we should only look for element 7 in the
10026 // Mask.
10027 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10028 continue;
10029
10030 bool OtherElementsInOrder = true;
10031 // Examine the other elements in the Mask to see if they're in original
10032 // order.
10033 for (unsigned j = 0; j < BytesInVector; ++j) {
10034 if (j == i)
10035 continue;
10036 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10037 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10038 // in which we always assume we're always picking from the 1st operand.
10039 int MaskOffset =
10040 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10041 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10042 OtherElementsInOrder = false;
10043 break;
10044 }
10045 }
10046 // If other elements are in original order, we record the number of shifts
10047 // we need to get the element we want into element 7. Also record which byte
10048 // in the vector we should insert into.
10049 if (OtherElementsInOrder) {
10050 // If 2nd operand is undefined, we assume no shifts and no swapping.
10051 if (V2.isUndef()) {
10052 ShiftElts = 0;
10053 Swap = false;
10054 } else {
10055 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10056 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10057 : BigEndianShifts[CurrentElement & 0xF];
10058 Swap = CurrentElement < BytesInVector;
10059 }
10060 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10061 FoundCandidate = true;
10062 break;
10063 }
10064 }
10065
10066 if (!FoundCandidate)
10067 return SDValue();
10068
10069 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10070 // optionally with VECSHL if shift is required.
10071 if (Swap)
10072 std::swap(V1, V2);
10073 if (V2.isUndef())
10074 V2 = V1;
10075 if (ShiftElts) {
10076 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10077 DAG.getConstant(ShiftElts, dl, MVT::i32));
10078 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10079 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10080 }
10081 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10082 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10083}
10084
10085/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10086/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10087/// SDValue.
10088SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10089 SelectionDAG &DAG) const {
10090 const unsigned NumHalfWords = 8;
10091 const unsigned BytesInVector = NumHalfWords * 2;
10092 // Check that the shuffle is on half-words.
10093 if (!isNByteElemShuffleMask(N, 2, 1))
10094 return SDValue();
10095
10096 bool IsLE = Subtarget.isLittleEndian();
10097 SDLoc dl(N);
10098 SDValue V1 = N->getOperand(0);
10099 SDValue V2 = N->getOperand(1);
10100 unsigned ShiftElts = 0, InsertAtByte = 0;
10101 bool Swap = false;
10102
10103 // Shifts required to get the half-word we want at element 3.
10104 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10105 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10106
10107 uint32_t Mask = 0;
10108 uint32_t OriginalOrderLow = 0x1234567;
10109 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10110 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10111 // 32-bit space, only need 4-bit nibbles per element.
10112 for (unsigned i = 0; i < NumHalfWords; ++i) {
10113 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10114 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10115 }
10116
10117 // For each mask element, find out if we're just inserting something
10118 // from V2 into V1 or vice versa. Possible permutations inserting an element
10119 // from V2 into V1:
10120 // X, 1, 2, 3, 4, 5, 6, 7
10121 // 0, X, 2, 3, 4, 5, 6, 7
10122 // 0, 1, X, 3, 4, 5, 6, 7
10123 // 0, 1, 2, X, 4, 5, 6, 7
10124 // 0, 1, 2, 3, X, 5, 6, 7
10125 // 0, 1, 2, 3, 4, X, 6, 7
10126 // 0, 1, 2, 3, 4, 5, X, 7
10127 // 0, 1, 2, 3, 4, 5, 6, X
10128 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10129
10130 bool FoundCandidate = false;
10131 // Go through the mask of half-words to find an element that's being moved
10132 // from one vector to the other.
10133 for (unsigned i = 0; i < NumHalfWords; ++i) {
10134 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10135 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10136 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10137 uint32_t TargetOrder = 0x0;
10138
10139 // If both vector operands for the shuffle are the same vector, the mask
10140 // will contain only elements from the first one and the second one will be
10141 // undef.
10142 if (V2.isUndef()) {
10143 ShiftElts = 0;
10144 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10145 TargetOrder = OriginalOrderLow;
10146 Swap = false;
10147 // Skip if not the correct element or mask of other elements don't equal
10148 // to our expected order.
10149 if (MaskOneElt == VINSERTHSrcElem &&
10150 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10151 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10152 FoundCandidate = true;
10153 break;
10154 }
10155 } else { // If both operands are defined.
10156 // Target order is [8,15] if the current mask is between [0,7].
10157 TargetOrder =
10158 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10159 // Skip if mask of other elements don't equal our expected order.
10160 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10161 // We only need the last 3 bits for the number of shifts.
10162 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10163 : BigEndianShifts[MaskOneElt & 0x7];
10164 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10165 Swap = MaskOneElt < NumHalfWords;
10166 FoundCandidate = true;
10167 break;
10168 }
10169 }
10170 }
10171
10172 if (!FoundCandidate)
10173 return SDValue();
10174
10175 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10176 // optionally with VECSHL if shift is required.
10177 if (Swap)
10178 std::swap(V1, V2);
10179 if (V2.isUndef())
10180 V2 = V1;
10181 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10182 if (ShiftElts) {
10183 // Double ShiftElts because we're left shifting on v16i8 type.
10184 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10185 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10186 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10187 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10188 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10189 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10190 }
10191 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10192 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10193 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10194 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10195}
10196
10197/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10198/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10199/// return the default SDValue.
10200SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10201 SelectionDAG &DAG) const {
10202 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10203 // to v16i8. Peek through the bitcasts to get the actual operands.
10206
10207 auto ShuffleMask = SVN->getMask();
10208 SDValue VecShuffle(SVN, 0);
10209 SDLoc DL(SVN);
10210
10211 // Check that we have a four byte shuffle.
10212 if (!isNByteElemShuffleMask(SVN, 4, 1))
10213 return SDValue();
10214
10215 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10216 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10217 std::swap(LHS, RHS);
10219 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10220 if (!CommutedSV)
10221 return SDValue();
10222 ShuffleMask = CommutedSV->getMask();
10223 }
10224
10225 // Ensure that the RHS is a vector of constants.
10226 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10227 if (!BVN)
10228 return SDValue();
10229
10230 // Check if RHS is a splat of 4-bytes (or smaller).
10231 APInt APSplatValue, APSplatUndef;
10232 unsigned SplatBitSize;
10233 bool HasAnyUndefs;
10234 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10235 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10236 SplatBitSize > 32)
10237 return SDValue();
10238
10239 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10240 // The instruction splats a constant C into two words of the source vector
10241 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10242 // Thus we check that the shuffle mask is the equivalent of
10243 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10244 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10245 // within each word are consecutive, so we only need to check the first byte.
10246 SDValue Index;
10247 bool IsLE = Subtarget.isLittleEndian();
10248 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10249 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10250 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10251 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10252 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10253 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10254 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10255 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10256 else
10257 return SDValue();
10258
10259 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10260 // for XXSPLTI32DX.
10261 unsigned SplatVal = APSplatValue.getZExtValue();
10262 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10263 SplatVal |= (SplatVal << SplatBitSize);
10264
10265 SDValue SplatNode = DAG.getNode(
10266 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10267 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10268 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10269}
10270
10271/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10272/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10273/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10274/// i.e (or (shl x, C1), (srl x, 128-C1)).
10275SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10276 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10277 assert(Op.getValueType() == MVT::v1i128 &&
10278 "Only set v1i128 as custom, other type shouldn't reach here!");
10279 SDLoc dl(Op);
10280 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10281 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10282 unsigned SHLAmt = N1.getConstantOperandVal(0);
10283 if (SHLAmt % 8 == 0) {
10284 std::array<int, 16> Mask;
10285 std::iota(Mask.begin(), Mask.end(), 0);
10286 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10287 if (SDValue Shuffle =
10288 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10289 DAG.getUNDEF(MVT::v16i8), Mask))
10290 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10291 }
10292 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10293 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10294 DAG.getConstant(SHLAmt, dl, MVT::i32));
10295 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10296 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10297 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10298 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10299}
10300
10301/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10302/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10303/// return the code it can be lowered into. Worst case, it can always be
10304/// lowered into a vperm.
10305SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10306 SelectionDAG &DAG) const {
10307 SDLoc dl(Op);
10308 SDValue V1 = Op.getOperand(0);
10309 SDValue V2 = Op.getOperand(1);
10310 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10311
10312 // Any nodes that were combined in the target-independent combiner prior
10313 // to vector legalization will not be sent to the target combine. Try to
10314 // combine it here.
10315 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10316 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10317 return NewShuffle;
10318 Op = NewShuffle;
10319 SVOp = cast<ShuffleVectorSDNode>(Op);
10320 V1 = Op.getOperand(0);
10321 V2 = Op.getOperand(1);
10322 }
10323 EVT VT = Op.getValueType();
10324 bool isLittleEndian = Subtarget.isLittleEndian();
10325
10326 unsigned ShiftElts, InsertAtByte;
10327 bool Swap = false;
10328
10329 // If this is a load-and-splat, we can do that with a single instruction
10330 // in some cases. However if the load has multiple uses, we don't want to
10331 // combine it because that will just produce multiple loads.
10332 bool IsPermutedLoad = false;
10333 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10334 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10335 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10336 InputLoad->hasOneUse()) {
10337 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10338 int SplatIdx =
10339 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10340
10341 // The splat index for permuted loads will be in the left half of the vector
10342 // which is strictly wider than the loaded value by 8 bytes. So we need to
10343 // adjust the splat index to point to the correct address in memory.
10344 if (IsPermutedLoad) {
10345 assert((isLittleEndian || IsFourByte) &&
10346 "Unexpected size for permuted load on big endian target");
10347 SplatIdx += IsFourByte ? 2 : 1;
10348 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10349 "Splat of a value outside of the loaded memory");
10350 }
10351
10352 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10353 // For 4-byte load-and-splat, we need Power9.
10354 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10355 uint64_t Offset = 0;
10356 if (IsFourByte)
10357 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10358 else
10359 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10360
10361 // If the width of the load is the same as the width of the splat,
10362 // loading with an offset would load the wrong memory.
10363 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10364 Offset = 0;
10365
10366 SDValue BasePtr = LD->getBasePtr();
10367 if (Offset != 0)
10369 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10370 SDValue Ops[] = {
10371 LD->getChain(), // Chain
10372 BasePtr, // BasePtr
10373 DAG.getValueType(Op.getValueType()) // VT
10374 };
10375 SDVTList VTL =
10376 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10377 SDValue LdSplt =
10379 Ops, LD->getMemoryVT(), LD->getMemOperand());
10380 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10381 if (LdSplt.getValueType() != SVOp->getValueType(0))
10382 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10383 return LdSplt;
10384 }
10385 }
10386
10387 // All v2i64 and v2f64 shuffles are legal
10388 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10389 return Op;
10390
10391 if (Subtarget.hasP9Vector() &&
10392 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10393 isLittleEndian)) {
10394 if (V2.isUndef())
10395 V2 = V1;
10396 else if (Swap)
10397 std::swap(V1, V2);
10398 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10399 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10400 if (ShiftElts) {
10401 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10402 DAG.getConstant(ShiftElts, dl, MVT::i32));
10403 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10404 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10405 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10406 }
10407 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10408 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10409 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10410 }
10411
10412 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10413 SDValue SplatInsertNode;
10414 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10415 return SplatInsertNode;
10416 }
10417
10418 if (Subtarget.hasP9Altivec()) {
10419 SDValue NewISDNode;
10420 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10421 return NewISDNode;
10422
10423 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10424 return NewISDNode;
10425 }
10426
10427 if (Subtarget.hasVSX() &&
10428 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10429 if (Swap)
10430 std::swap(V1, V2);
10431 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10432 SDValue Conv2 =
10433 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10434
10435 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10436 DAG.getConstant(ShiftElts, dl, MVT::i32));
10437 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10438 }
10439
10440 if (Subtarget.hasVSX() &&
10441 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10442 if (Swap)
10443 std::swap(V1, V2);
10444 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10445 SDValue Conv2 =
10446 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10447
10448 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10449 DAG.getConstant(ShiftElts, dl, MVT::i32));
10450 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10451 }
10452
10453 if (Subtarget.hasP9Vector()) {
10454 if (PPC::isXXBRHShuffleMask(SVOp)) {
10455 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10456 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10457 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10458 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10459 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10460 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10461 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10462 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10463 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10464 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10465 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10466 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10467 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10468 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10469 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10470 }
10471 }
10472
10473 if (Subtarget.hasVSX()) {
10474 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10475 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10476
10477 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10478 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10479 DAG.getConstant(SplatIdx, dl, MVT::i32));
10480 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10481 }
10482
10483 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10484 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10485 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10486 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10487 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10488 }
10489 }
10490
10491 // Cases that are handled by instructions that take permute immediates
10492 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10493 // selected by the instruction selector.
10494 if (V2.isUndef()) {
10495 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10496 PPC::isSplatShuffleMask(SVOp, 2) ||
10497 PPC::isSplatShuffleMask(SVOp, 4) ||
10498 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10499 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10500 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10501 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10502 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10503 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10504 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10505 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10506 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10507 (Subtarget.hasP8Altivec() && (
10508 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10509 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10510 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10511 return Op;
10512 }
10513 }
10514
10515 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10516 // and produce a fixed permutation. If any of these match, do not lower to
10517 // VPERM.
10518 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10519 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10520 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10521 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10522 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10523 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10524 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10525 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10526 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10527 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10528 (Subtarget.hasP8Altivec() && (
10529 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10530 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10531 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10532 return Op;
10533
10534 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10535 // perfect shuffle table to emit an optimal matching sequence.
10536 ArrayRef<int> PermMask = SVOp->getMask();
10537
10538 if (!DisablePerfectShuffle && !isLittleEndian) {
10539 unsigned PFIndexes[4];
10540 bool isFourElementShuffle = true;
10541 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10542 ++i) { // Element number
10543 unsigned EltNo = 8; // Start out undef.
10544 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10545 if (PermMask[i * 4 + j] < 0)
10546 continue; // Undef, ignore it.
10547
10548 unsigned ByteSource = PermMask[i * 4 + j];
10549 if ((ByteSource & 3) != j) {
10550 isFourElementShuffle = false;
10551 break;
10552 }
10553
10554 if (EltNo == 8) {
10555 EltNo = ByteSource / 4;
10556 } else if (EltNo != ByteSource / 4) {
10557 isFourElementShuffle = false;
10558 break;
10559 }
10560 }
10561 PFIndexes[i] = EltNo;
10562 }
10563
10564 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10565 // perfect shuffle vector to determine if it is cost effective to do this as
10566 // discrete instructions, or whether we should use a vperm.
10567 // For now, we skip this for little endian until such time as we have a
10568 // little-endian perfect shuffle table.
10569 if (isFourElementShuffle) {
10570 // Compute the index in the perfect shuffle table.
10571 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10572 PFIndexes[2] * 9 + PFIndexes[3];
10573
10574 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10575 unsigned Cost = (PFEntry >> 30);
10576
10577 // Determining when to avoid vperm is tricky. Many things affect the cost
10578 // of vperm, particularly how many times the perm mask needs to be
10579 // computed. For example, if the perm mask can be hoisted out of a loop or
10580 // is already used (perhaps because there are multiple permutes with the
10581 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10582 // permute mask out of the loop requires an extra register.
10583 //
10584 // As a compromise, we only emit discrete instructions if the shuffle can
10585 // be generated in 3 or fewer operations. When we have loop information
10586 // available, if this block is within a loop, we should avoid using vperm
10587 // for 3-operation perms and use a constant pool load instead.
10588 if (Cost < 3)
10589 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10590 }
10591 }
10592
10593 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10594 // vector that will get spilled to the constant pool.
10595 if (V2.isUndef()) V2 = V1;
10596
10597 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10598}
10599
10600SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10601 ArrayRef<int> PermMask, EVT VT,
10602 SDValue V1, SDValue V2) const {
10603 unsigned Opcode = PPCISD::VPERM;
10604 EVT ValType = V1.getValueType();
10605 SDLoc dl(Op);
10606 bool NeedSwap = false;
10607 bool isLittleEndian = Subtarget.isLittleEndian();
10608 bool isPPC64 = Subtarget.isPPC64();
10609
10610 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10611 (V1->hasOneUse() || V2->hasOneUse())) {
10612 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10613 "XXPERM instead\n");
10614 Opcode = PPCISD::XXPERM;
10615
10616 // The second input to XXPERM is also an output so if the second input has
10617 // multiple uses then copying is necessary, as a result we want the
10618 // single-use operand to be used as the second input to prevent copying.
10619 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10620 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10621 std::swap(V1, V2);
10622 NeedSwap = !NeedSwap;
10623 }
10624 }
10625
10626 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10627 // that it is in input element units, not in bytes. Convert now.
10628
10629 // For little endian, the order of the input vectors is reversed, and
10630 // the permutation mask is complemented with respect to 31. This is
10631 // necessary to produce proper semantics with the big-endian-based vperm
10632 // instruction.
10633 EVT EltVT = V1.getValueType().getVectorElementType();
10634 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10635
10636 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10637 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10638
10639 /*
10640 Vectors will be appended like so: [ V1 | v2 ]
10641 XXSWAPD on V1:
10642 [ A | B | C | D ] -> [ C | D | A | B ]
10643 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10644 i.e. index of A, B += 8, and index of C, D -= 8.
10645 XXSWAPD on V2:
10646 [ E | F | G | H ] -> [ G | H | E | F ]
10647 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10648 i.e. index of E, F += 8, index of G, H -= 8
10649 Swap V1 and V2:
10650 [ V1 | V2 ] -> [ V2 | V1 ]
10651 0-15 16-31 0-15 16-31
10652 i.e. index of V1 += 16, index of V2 -= 16
10653 */
10654
10655 SmallVector<SDValue, 16> ResultMask;
10656 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10657 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10658
10659 if (V1HasXXSWAPD) {
10660 if (SrcElt < 8)
10661 SrcElt += 8;
10662 else if (SrcElt < 16)
10663 SrcElt -= 8;
10664 }
10665 if (V2HasXXSWAPD) {
10666 if (SrcElt > 23)
10667 SrcElt -= 8;
10668 else if (SrcElt > 15)
10669 SrcElt += 8;
10670 }
10671 if (NeedSwap) {
10672 if (SrcElt < 16)
10673 SrcElt += 16;
10674 else
10675 SrcElt -= 16;
10676 }
10677 for (unsigned j = 0; j != BytesPerElement; ++j)
10678 if (isLittleEndian)
10679 ResultMask.push_back(
10680 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10681 else
10682 ResultMask.push_back(
10683 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10684 }
10685
10686 if (V1HasXXSWAPD) {
10687 dl = SDLoc(V1->getOperand(0));
10688 V1 = V1->getOperand(0)->getOperand(1);
10689 }
10690 if (V2HasXXSWAPD) {
10691 dl = SDLoc(V2->getOperand(0));
10692 V2 = V2->getOperand(0)->getOperand(1);
10693 }
10694
10695 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10696 if (ValType != MVT::v2f64)
10697 V1 = DAG.getBitcast(MVT::v2f64, V1);
10698 if (V2.getValueType() != MVT::v2f64)
10699 V2 = DAG.getBitcast(MVT::v2f64, V2);
10700 }
10701
10702 ShufflesHandledWithVPERM++;
10703 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10704 LLVM_DEBUG({
10705 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10706 if (Opcode == PPCISD::XXPERM) {
10707 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10708 } else {
10709 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10710 }
10711 SVOp->dump();
10712 dbgs() << "With the following permute control vector:\n";
10713 VPermMask.dump();
10714 });
10715
10716 if (Opcode == PPCISD::XXPERM)
10717 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10718
10719 // Only need to place items backwards in LE,
10720 // the mask was properly calculated.
10721 if (isLittleEndian)
10722 std::swap(V1, V2);
10723
10724 SDValue VPERMNode =
10725 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10726
10727 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10728 return VPERMNode;
10729}
10730
10731/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10732/// vector comparison. If it is, return true and fill in Opc/isDot with
10733/// information about the intrinsic.
10734static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10735 bool &isDot, const PPCSubtarget &Subtarget) {
10736 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10737 CompareOpc = -1;
10738 isDot = false;
10739 switch (IntrinsicID) {
10740 default:
10741 return false;
10742 // Comparison predicates.
10743 case Intrinsic::ppc_altivec_vcmpbfp_p:
10744 CompareOpc = 966;
10745 isDot = true;
10746 break;
10747 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10748 CompareOpc = 198;
10749 isDot = true;
10750 break;
10751 case Intrinsic::ppc_altivec_vcmpequb_p:
10752 CompareOpc = 6;
10753 isDot = true;
10754 break;
10755 case Intrinsic::ppc_altivec_vcmpequh_p:
10756 CompareOpc = 70;
10757 isDot = true;
10758 break;
10759 case Intrinsic::ppc_altivec_vcmpequw_p:
10760 CompareOpc = 134;
10761 isDot = true;
10762 break;
10763 case Intrinsic::ppc_altivec_vcmpequd_p:
10764 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10765 CompareOpc = 199;
10766 isDot = true;
10767 } else
10768 return false;
10769 break;
10770 case Intrinsic::ppc_altivec_vcmpneb_p:
10771 case Intrinsic::ppc_altivec_vcmpneh_p:
10772 case Intrinsic::ppc_altivec_vcmpnew_p:
10773 case Intrinsic::ppc_altivec_vcmpnezb_p:
10774 case Intrinsic::ppc_altivec_vcmpnezh_p:
10775 case Intrinsic::ppc_altivec_vcmpnezw_p:
10776 if (Subtarget.hasP9Altivec()) {
10777 switch (IntrinsicID) {
10778 default:
10779 llvm_unreachable("Unknown comparison intrinsic.");
10780 case Intrinsic::ppc_altivec_vcmpneb_p:
10781 CompareOpc = 7;
10782 break;
10783 case Intrinsic::ppc_altivec_vcmpneh_p:
10784 CompareOpc = 71;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpnew_p:
10787 CompareOpc = 135;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpnezb_p:
10790 CompareOpc = 263;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnezh_p:
10793 CompareOpc = 327;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezw_p:
10796 CompareOpc = 391;
10797 break;
10798 }
10799 isDot = true;
10800 } else
10801 return false;
10802 break;
10803 case Intrinsic::ppc_altivec_vcmpgefp_p:
10804 CompareOpc = 454;
10805 isDot = true;
10806 break;
10807 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10808 CompareOpc = 710;
10809 isDot = true;
10810 break;
10811 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10812 CompareOpc = 774;
10813 isDot = true;
10814 break;
10815 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10816 CompareOpc = 838;
10817 isDot = true;
10818 break;
10819 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10820 CompareOpc = 902;
10821 isDot = true;
10822 break;
10823 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10824 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10825 CompareOpc = 967;
10826 isDot = true;
10827 } else
10828 return false;
10829 break;
10830 case Intrinsic::ppc_altivec_vcmpgtub_p:
10831 CompareOpc = 518;
10832 isDot = true;
10833 break;
10834 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10835 CompareOpc = 582;
10836 isDot = true;
10837 break;
10838 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10839 CompareOpc = 646;
10840 isDot = true;
10841 break;
10842 case Intrinsic::ppc_altivec_vcmpgtud_p:
10843 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10844 CompareOpc = 711;
10845 isDot = true;
10846 } else
10847 return false;
10848 break;
10849
10850 case Intrinsic::ppc_altivec_vcmpequq:
10851 case Intrinsic::ppc_altivec_vcmpgtsq:
10852 case Intrinsic::ppc_altivec_vcmpgtuq:
10853 if (!Subtarget.isISA3_1())
10854 return false;
10855 switch (IntrinsicID) {
10856 default:
10857 llvm_unreachable("Unknown comparison intrinsic.");
10858 case Intrinsic::ppc_altivec_vcmpequq:
10859 CompareOpc = 455;
10860 break;
10861 case Intrinsic::ppc_altivec_vcmpgtsq:
10862 CompareOpc = 903;
10863 break;
10864 case Intrinsic::ppc_altivec_vcmpgtuq:
10865 CompareOpc = 647;
10866 break;
10867 }
10868 break;
10869
10870 // VSX predicate comparisons use the same infrastructure
10871 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10872 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10873 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10874 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10875 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10876 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10877 if (Subtarget.hasVSX()) {
10878 switch (IntrinsicID) {
10879 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10880 CompareOpc = 99;
10881 break;
10882 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10883 CompareOpc = 115;
10884 break;
10885 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10886 CompareOpc = 107;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10889 CompareOpc = 67;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10892 CompareOpc = 83;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10895 CompareOpc = 75;
10896 break;
10897 }
10898 isDot = true;
10899 } else
10900 return false;
10901 break;
10902
10903 // Normal Comparisons.
10904 case Intrinsic::ppc_altivec_vcmpbfp:
10905 CompareOpc = 966;
10906 break;
10907 case Intrinsic::ppc_altivec_vcmpeqfp:
10908 CompareOpc = 198;
10909 break;
10910 case Intrinsic::ppc_altivec_vcmpequb:
10911 CompareOpc = 6;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpequh:
10914 CompareOpc = 70;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequw:
10917 CompareOpc = 134;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequd:
10920 if (Subtarget.hasP8Altivec())
10921 CompareOpc = 199;
10922 else
10923 return false;
10924 break;
10925 case Intrinsic::ppc_altivec_vcmpneb:
10926 case Intrinsic::ppc_altivec_vcmpneh:
10927 case Intrinsic::ppc_altivec_vcmpnew:
10928 case Intrinsic::ppc_altivec_vcmpnezb:
10929 case Intrinsic::ppc_altivec_vcmpnezh:
10930 case Intrinsic::ppc_altivec_vcmpnezw:
10931 if (Subtarget.hasP9Altivec())
10932 switch (IntrinsicID) {
10933 default:
10934 llvm_unreachable("Unknown comparison intrinsic.");
10935 case Intrinsic::ppc_altivec_vcmpneb:
10936 CompareOpc = 7;
10937 break;
10938 case Intrinsic::ppc_altivec_vcmpneh:
10939 CompareOpc = 71;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpnew:
10942 CompareOpc = 135;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpnezb:
10945 CompareOpc = 263;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnezh:
10948 CompareOpc = 327;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezw:
10951 CompareOpc = 391;
10952 break;
10953 }
10954 else
10955 return false;
10956 break;
10957 case Intrinsic::ppc_altivec_vcmpgefp:
10958 CompareOpc = 454;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgtfp:
10961 CompareOpc = 710;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtsb:
10964 CompareOpc = 774;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtsh:
10967 CompareOpc = 838;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsw:
10970 CompareOpc = 902;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsd:
10973 if (Subtarget.hasP8Altivec())
10974 CompareOpc = 967;
10975 else
10976 return false;
10977 break;
10978 case Intrinsic::ppc_altivec_vcmpgtub:
10979 CompareOpc = 518;
10980 break;
10981 case Intrinsic::ppc_altivec_vcmpgtuh:
10982 CompareOpc = 582;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtuw:
10985 CompareOpc = 646;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtud:
10988 if (Subtarget.hasP8Altivec())
10989 CompareOpc = 711;
10990 else
10991 return false;
10992 break;
10993 case Intrinsic::ppc_altivec_vcmpequq_p:
10994 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10995 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10996 if (!Subtarget.isISA3_1())
10997 return false;
10998 switch (IntrinsicID) {
10999 default:
11000 llvm_unreachable("Unknown comparison intrinsic.");
11001 case Intrinsic::ppc_altivec_vcmpequq_p:
11002 CompareOpc = 455;
11003 break;
11004 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11005 CompareOpc = 903;
11006 break;
11007 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11008 CompareOpc = 647;
11009 break;
11010 }
11011 isDot = true;
11012 break;
11013 }
11014 return true;
11015}
11016
11017/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11018/// lower, do it, otherwise return null.
11019SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11020 SelectionDAG &DAG) const {
11021 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11022
11023 SDLoc dl(Op);
11024
11025 switch (IntrinsicID) {
11026 case Intrinsic::thread_pointer:
11027 // Reads the thread pointer register, used for __builtin_thread_pointer.
11028 if (Subtarget.isPPC64())
11029 return DAG.getRegister(PPC::X13, MVT::i64);
11030 return DAG.getRegister(PPC::R2, MVT::i32);
11031
11032 case Intrinsic::ppc_rldimi: {
11033 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11034 SDValue Src = Op.getOperand(1);
11035 APInt Mask = Op.getConstantOperandAPInt(4);
11036 if (Mask.isZero())
11037 return Op.getOperand(2);
11038 if (Mask.isAllOnes())
11039 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11040 uint64_t SH = Op.getConstantOperandVal(3);
11041 unsigned MB = 0, ME = 0;
11042 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11043 report_fatal_error("invalid rldimi mask!");
11044 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11045 if (ME < 63 - SH) {
11046 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11047 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11048 } else if (ME > 63 - SH) {
11049 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11050 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11051 }
11052 return SDValue(
11053 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11054 {Op.getOperand(2), Src,
11055 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11056 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11057 0);
11058 }
11059
11060 case Intrinsic::ppc_rlwimi: {
11061 APInt Mask = Op.getConstantOperandAPInt(4);
11062 if (Mask.isZero())
11063 return Op.getOperand(2);
11064 if (Mask.isAllOnes())
11065 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11066 Op.getOperand(3));
11067 unsigned MB = 0, ME = 0;
11068 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11069 report_fatal_error("invalid rlwimi mask!");
11070 return SDValue(DAG.getMachineNode(
11071 PPC::RLWIMI, dl, MVT::i32,
11072 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11073 DAG.getTargetConstant(MB, dl, MVT::i32),
11074 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11075 0);
11076 }
11077
11078 case Intrinsic::ppc_rlwnm: {
11079 if (Op.getConstantOperandVal(3) == 0)
11080 return DAG.getConstant(0, dl, MVT::i32);
11081 unsigned MB = 0, ME = 0;
11082 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11083 report_fatal_error("invalid rlwnm mask!");
11084 return SDValue(
11085 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11086 {Op.getOperand(1), Op.getOperand(2),
11087 DAG.getTargetConstant(MB, dl, MVT::i32),
11088 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11089 0);
11090 }
11091
11092 case Intrinsic::ppc_mma_disassemble_acc: {
11093 if (Subtarget.isISAFuture()) {
11094 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11095 SDValue WideVec =
11096 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11097 Op.getOperand(1)),
11098 0);
11100 SDValue Value = SDValue(WideVec.getNode(), 0);
11101 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11102
11103 SDValue Extract;
11104 Extract = DAG.getNode(
11105 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11106 Subtarget.isLittleEndian() ? Value2 : Value,
11107 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11108 dl, getPointerTy(DAG.getDataLayout())));
11109 RetOps.push_back(Extract);
11110 Extract = DAG.getNode(
11111 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11112 Subtarget.isLittleEndian() ? Value2 : Value,
11113 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11114 dl, getPointerTy(DAG.getDataLayout())));
11115 RetOps.push_back(Extract);
11116 Extract = DAG.getNode(
11117 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11118 Subtarget.isLittleEndian() ? Value : Value2,
11119 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11120 dl, getPointerTy(DAG.getDataLayout())));
11121 RetOps.push_back(Extract);
11122 Extract = DAG.getNode(
11123 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11124 Subtarget.isLittleEndian() ? Value : Value2,
11125 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11126 dl, getPointerTy(DAG.getDataLayout())));
11127 RetOps.push_back(Extract);
11128 return DAG.getMergeValues(RetOps, dl);
11129 }
11130 [[fallthrough]];
11131 }
11132 case Intrinsic::ppc_vsx_disassemble_pair: {
11133 int NumVecs = 2;
11134 SDValue WideVec = Op.getOperand(1);
11135 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11136 NumVecs = 4;
11137 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11138 }
11140 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11141 SDValue Extract = DAG.getNode(
11142 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11143 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11144 : VecNo,
11145 dl, getPointerTy(DAG.getDataLayout())));
11146 RetOps.push_back(Extract);
11147 }
11148 return DAG.getMergeValues(RetOps, dl);
11149 }
11150
11151 case Intrinsic::ppc_mma_xxmfacc:
11152 case Intrinsic::ppc_mma_xxmtacc: {
11153 // Allow pre-isa-future subtargets to lower as normal.
11154 if (!Subtarget.isISAFuture())
11155 return SDValue();
11156 // The intrinsics for xxmtacc and xxmfacc take one argument of
11157 // type v512i1, for future cpu the corresponding wacc instruction
11158 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11159 // the need to produce the xxm[t|f]acc.
11160 SDValue WideVec = Op.getOperand(1);
11161 DAG.ReplaceAllUsesWith(Op, WideVec);
11162 return SDValue();
11163 }
11164
11165 case Intrinsic::ppc_unpack_longdouble: {
11166 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11167 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11168 "Argument of long double unpack must be 0 or 1!");
11169 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11170 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11171 Idx->getValueType(0)));
11172 }
11173
11174 case Intrinsic::ppc_compare_exp_lt:
11175 case Intrinsic::ppc_compare_exp_gt:
11176 case Intrinsic::ppc_compare_exp_eq:
11177 case Intrinsic::ppc_compare_exp_uo: {
11178 unsigned Pred;
11179 switch (IntrinsicID) {
11180 case Intrinsic::ppc_compare_exp_lt:
11181 Pred = PPC::PRED_LT;
11182 break;
11183 case Intrinsic::ppc_compare_exp_gt:
11184 Pred = PPC::PRED_GT;
11185 break;
11186 case Intrinsic::ppc_compare_exp_eq:
11187 Pred = PPC::PRED_EQ;
11188 break;
11189 case Intrinsic::ppc_compare_exp_uo:
11190 Pred = PPC::PRED_UN;
11191 break;
11192 }
11193 return SDValue(
11194 DAG.getMachineNode(
11195 PPC::SELECT_CC_I4, dl, MVT::i32,
11196 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11197 Op.getOperand(1), Op.getOperand(2)),
11198 0),
11199 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11200 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11201 0);
11202 }
11203 case Intrinsic::ppc_test_data_class: {
11204 EVT OpVT = Op.getOperand(1).getValueType();
11205 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11206 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11207 : PPC::XSTSTDCSP);
11208 return SDValue(
11209 DAG.getMachineNode(
11210 PPC::SELECT_CC_I4, dl, MVT::i32,
11211 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11212 Op.getOperand(1)),
11213 0),
11214 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11215 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11216 0);
11217 }
11218 case Intrinsic::ppc_fnmsub: {
11219 EVT VT = Op.getOperand(1).getValueType();
11220 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11221 return DAG.getNode(
11222 ISD::FNEG, dl, VT,
11223 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11224 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11225 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11226 Op.getOperand(2), Op.getOperand(3));
11227 }
11228 case Intrinsic::ppc_convert_f128_to_ppcf128:
11229 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11230 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11231 ? RTLIB::CONVERT_PPCF128_F128
11232 : RTLIB::CONVERT_F128_PPCF128;
11233 MakeLibCallOptions CallOptions;
11234 std::pair<SDValue, SDValue> Result =
11235 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11236 dl, SDValue());
11237 return Result.first;
11238 }
11239 case Intrinsic::ppc_maxfe:
11240 case Intrinsic::ppc_maxfl:
11241 case Intrinsic::ppc_maxfs:
11242 case Intrinsic::ppc_minfe:
11243 case Intrinsic::ppc_minfl:
11244 case Intrinsic::ppc_minfs: {
11245 EVT VT = Op.getValueType();
11246 assert(
11247 all_of(Op->ops().drop_front(4),
11248 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11249 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11250 (void)VT;
11252 if (IntrinsicID == Intrinsic::ppc_minfe ||
11253 IntrinsicID == Intrinsic::ppc_minfl ||
11254 IntrinsicID == Intrinsic::ppc_minfs)
11255 CC = ISD::SETLT;
11256 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11257 SDValue Res = Op.getOperand(I);
11258 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11259 Res =
11260 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11261 }
11262 return Res;
11263 }
11264 }
11265
11266 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11267 // opcode number of the comparison.
11268 int CompareOpc;
11269 bool isDot;
11270 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11271 return SDValue(); // Don't custom lower most intrinsics.
11272
11273 // If this is a non-dot comparison, make the VCMP node and we are done.
11274 if (!isDot) {
11275 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11276 Op.getOperand(1), Op.getOperand(2),
11277 DAG.getConstant(CompareOpc, dl, MVT::i32));
11278 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11279 }
11280
11281 // Create the PPCISD altivec 'dot' comparison node.
11282 SDValue Ops[] = {
11283 Op.getOperand(2), // LHS
11284 Op.getOperand(3), // RHS
11285 DAG.getConstant(CompareOpc, dl, MVT::i32)
11286 };
11287 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11288 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11289
11290 // Unpack the result based on how the target uses it.
11291 unsigned BitNo; // Bit # of CR6.
11292 bool InvertBit; // Invert result?
11293 unsigned Bitx;
11294 unsigned SetOp;
11295 switch (Op.getConstantOperandVal(1)) {
11296 default: // Can't happen, don't crash on invalid number though.
11297 case 0: // Return the value of the EQ bit of CR6.
11298 BitNo = 0;
11299 InvertBit = false;
11300 Bitx = PPC::sub_eq;
11301 SetOp = PPCISD::SETBC;
11302 break;
11303 case 1: // Return the inverted value of the EQ bit of CR6.
11304 BitNo = 0;
11305 InvertBit = true;
11306 Bitx = PPC::sub_eq;
11307 SetOp = PPCISD::SETBCR;
11308 break;
11309 case 2: // Return the value of the LT bit of CR6.
11310 BitNo = 2;
11311 InvertBit = false;
11312 Bitx = PPC::sub_lt;
11313 SetOp = PPCISD::SETBC;
11314 break;
11315 case 3: // Return the inverted value of the LT bit of CR6.
11316 BitNo = 2;
11317 InvertBit = true;
11318 Bitx = PPC::sub_lt;
11319 SetOp = PPCISD::SETBCR;
11320 break;
11321 }
11322
11323 SDValue GlueOp = CompNode.getValue(1);
11324 if (Subtarget.isISA3_1()) {
11325 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11326 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11327 SDValue CRBit =
11328 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11329 CR6Reg, SubRegIdx, GlueOp),
11330 0);
11331 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11332 }
11333
11334 // Now that we have the comparison, emit a copy from the CR to a GPR.
11335 // This is flagged to the above dot comparison.
11336 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11337 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11338
11339 // Shift the bit into the low position.
11340 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11341 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11342 // Isolate the bit.
11343 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11344 DAG.getConstant(1, dl, MVT::i32));
11345
11346 // If we are supposed to, toggle the bit.
11347 if (InvertBit)
11348 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11349 DAG.getConstant(1, dl, MVT::i32));
11350 return Flags;
11351}
11352
11353SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11354 SelectionDAG &DAG) const {
11355 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11356 // the beginning of the argument list.
11357 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11358 SDLoc DL(Op);
11359 switch (Op.getConstantOperandVal(ArgStart)) {
11360 case Intrinsic::ppc_cfence: {
11361 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11362 SDValue Val = Op.getOperand(ArgStart + 1);
11363 EVT Ty = Val.getValueType();
11364 if (Ty == MVT::i128) {
11365 // FIXME: Testing one of two paired registers is sufficient to guarantee
11366 // ordering?
11367 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11368 }
11369 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11370 return SDValue(
11371 DAG.getMachineNode(
11372 Opcode, DL, MVT::Other,
11373 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11374 Op.getOperand(0)),
11375 0);
11376 }
11377 default:
11378 break;
11379 }
11380 return SDValue();
11381}
11382
11383// Lower scalar BSWAP64 to xxbrd.
11384SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11385 SDLoc dl(Op);
11386 if (!Subtarget.isPPC64())
11387 return Op;
11388 // MTVSRDD
11389 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11390 Op.getOperand(0));
11391 // XXBRD
11392 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11393 // MFVSRD
11394 int VectorIndex = 0;
11395 if (Subtarget.isLittleEndian())
11396 VectorIndex = 1;
11397 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11398 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11399 return Op;
11400}
11401
11402// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11403// compared to a value that is atomically loaded (atomic loads zero-extend).
11404SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11405 SelectionDAG &DAG) const {
11406 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11407 "Expecting an atomic compare-and-swap here.");
11408 SDLoc dl(Op);
11409 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11410 EVT MemVT = AtomicNode->getMemoryVT();
11411 if (MemVT.getSizeInBits() >= 32)
11412 return Op;
11413
11414 SDValue CmpOp = Op.getOperand(2);
11415 // If this is already correctly zero-extended, leave it alone.
11416 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11417 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11418 return Op;
11419
11420 // Clear the high bits of the compare operand.
11421 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11422 SDValue NewCmpOp =
11423 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11424 DAG.getConstant(MaskVal, dl, MVT::i32));
11425
11426 // Replace the existing compare operand with the properly zero-extended one.
11428 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11429 Ops.push_back(AtomicNode->getOperand(i));
11430 Ops[2] = NewCmpOp;
11431 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11432 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11433 auto NodeTy =
11435 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11436}
11437
11438SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11439 SelectionDAG &DAG) const {
11440 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11441 EVT MemVT = N->getMemoryVT();
11442 assert(MemVT.getSimpleVT() == MVT::i128 &&
11443 "Expect quadword atomic operations");
11444 SDLoc dl(N);
11445 unsigned Opc = N->getOpcode();
11446 switch (Opc) {
11447 case ISD::ATOMIC_LOAD: {
11448 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11449 // lowered to ppc instructions by pattern matching instruction selector.
11450 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11452 N->getOperand(0),
11453 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11454 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11455 Ops.push_back(N->getOperand(I));
11456 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11457 Ops, MemVT, N->getMemOperand());
11458 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11459 SDValue ValHi =
11460 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11461 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11462 DAG.getConstant(64, dl, MVT::i32));
11463 SDValue Val =
11464 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11465 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11466 {Val, LoadedVal.getValue(2)});
11467 }
11468 case ISD::ATOMIC_STORE: {
11469 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11470 // lowered to ppc instructions by pattern matching instruction selector.
11471 SDVTList Tys = DAG.getVTList(MVT::Other);
11473 N->getOperand(0),
11474 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11475 SDValue Val = N->getOperand(1);
11476 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11477 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11478 DAG.getConstant(64, dl, MVT::i32));
11479 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11480 Ops.push_back(ValLo);
11481 Ops.push_back(ValHi);
11482 Ops.push_back(N->getOperand(2));
11483 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11484 N->getMemOperand());
11485 }
11486 default:
11487 llvm_unreachable("Unexpected atomic opcode");
11488 }
11489}
11490
11492 SelectionDAG &DAG,
11493 const PPCSubtarget &Subtarget) {
11494 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11495
11496 enum DataClassMask {
11497 DC_NAN = 1 << 6,
11498 DC_NEG_INF = 1 << 4,
11499 DC_POS_INF = 1 << 5,
11500 DC_NEG_ZERO = 1 << 2,
11501 DC_POS_ZERO = 1 << 3,
11502 DC_NEG_SUBNORM = 1,
11503 DC_POS_SUBNORM = 1 << 1,
11504 };
11505
11506 EVT VT = Op.getValueType();
11507
11508 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11509 : VT == MVT::f64 ? PPC::XSTSTDCDP
11510 : PPC::XSTSTDCSP;
11511
11512 if (Mask == fcAllFlags)
11513 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11514 if (Mask == 0)
11515 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11516
11517 // When it's cheaper or necessary to test reverse flags.
11518 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11519 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11520 return DAG.getNOT(Dl, Rev, MVT::i1);
11521 }
11522
11523 // Power doesn't support testing whether a value is 'normal'. Test the rest
11524 // first, and test if it's 'not not-normal' with expected sign.
11525 if (Mask & fcNormal) {
11526 SDValue Rev(DAG.getMachineNode(
11527 TestOp, Dl, MVT::i32,
11528 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11529 DC_NEG_ZERO | DC_POS_ZERO |
11530 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11531 Dl, MVT::i32),
11532 Op),
11533 0);
11534 // Sign are stored in CR bit 0, result are in CR bit 2.
11535 SDValue Sign(
11536 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11537 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11538 0);
11539 SDValue Normal(DAG.getNOT(
11540 Dl,
11542 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11543 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11544 0),
11545 MVT::i1));
11546 if (Mask & fcPosNormal)
11547 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11548 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11549 if (Mask == fcPosNormal || Mask == fcNegNormal)
11550 return Result;
11551
11552 return DAG.getNode(
11553 ISD::OR, Dl, MVT::i1,
11554 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11555 }
11556
11557 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11558 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11559 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11560 bool IsQuiet = Mask & fcQNan;
11561 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11562
11563 // Quietness is determined by the first bit in fraction field.
11564 uint64_t QuietMask = 0;
11565 SDValue HighWord;
11566 if (VT == MVT::f128) {
11567 HighWord = DAG.getNode(
11568 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11569 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11570 QuietMask = 0x8000;
11571 } else if (VT == MVT::f64) {
11572 if (Subtarget.isPPC64()) {
11573 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11574 DAG.getBitcast(MVT::i64, Op),
11575 DAG.getConstant(1, Dl, MVT::i32));
11576 } else {
11577 SDValue Vec = DAG.getBitcast(
11578 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11579 HighWord = DAG.getNode(
11580 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11581 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11582 }
11583 QuietMask = 0x80000;
11584 } else if (VT == MVT::f32) {
11585 HighWord = DAG.getBitcast(MVT::i32, Op);
11586 QuietMask = 0x400000;
11587 }
11588 SDValue NanRes = DAG.getSetCC(
11589 Dl, MVT::i1,
11590 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11591 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11592 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11593 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11594 if (Mask == fcQNan || Mask == fcSNan)
11595 return NanRes;
11596
11597 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11598 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11599 NanRes);
11600 }
11601
11602 unsigned NativeMask = 0;
11603 if ((Mask & fcNan) == fcNan)
11604 NativeMask |= DC_NAN;
11605 if (Mask & fcNegInf)
11606 NativeMask |= DC_NEG_INF;
11607 if (Mask & fcPosInf)
11608 NativeMask |= DC_POS_INF;
11609 if (Mask & fcNegZero)
11610 NativeMask |= DC_NEG_ZERO;
11611 if (Mask & fcPosZero)
11612 NativeMask |= DC_POS_ZERO;
11613 if (Mask & fcNegSubnormal)
11614 NativeMask |= DC_NEG_SUBNORM;
11615 if (Mask & fcPosSubnormal)
11616 NativeMask |= DC_POS_SUBNORM;
11617 return SDValue(
11618 DAG.getMachineNode(
11619 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11621 TestOp, Dl, MVT::i32,
11622 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11623 0),
11624 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11625 0);
11626}
11627
11628SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11629 SelectionDAG &DAG) const {
11630 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11631 SDValue LHS = Op.getOperand(0);
11632 uint64_t RHSC = Op.getConstantOperandVal(1);
11633 SDLoc Dl(Op);
11634 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11635 if (LHS.getValueType() == MVT::ppcf128) {
11636 // The higher part determines the value class.
11637 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11638 DAG.getConstant(1, Dl, MVT::i32));
11639 }
11640
11641 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11642}
11643
11644SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11645 SelectionDAG &DAG) const {
11646 SDLoc dl(Op);
11647
11649 SDValue Op0 = Op.getOperand(0);
11650 EVT ValVT = Op0.getValueType();
11651 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11652 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11653 int64_t IntVal = Op.getConstantOperandVal(0);
11654 if (IntVal >= -16 && IntVal <= 15)
11655 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11656 dl);
11657 }
11658
11659 ReuseLoadInfo RLI;
11660 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11661 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11662 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11663 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11664
11665 MachineMemOperand *MMO =
11667 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11668 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11670 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11671 MVT::i32, MMO);
11672 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
11673 return Bits.getValue(0);
11674 }
11675
11676 // Create a stack slot that is 16-byte aligned.
11677 MachineFrameInfo &MFI = MF.getFrameInfo();
11678 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11679 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11680 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11681
11682 SDValue Val = Op0;
11683 // P10 hardware store forwarding requires that a single store contains all
11684 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11685 // to avoid load hit store on P10 when running binaries compiled for older
11686 // processors by generating two mergeable scalar stores to forward with the
11687 // vector load.
11688 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11689 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11690 ValVT.getSizeInBits() <= 64) {
11691 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11692 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11693 SDValue ShiftBy = DAG.getConstant(
11694 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11695 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11696 SDValue Plus8 =
11697 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11698 SDValue Store2 =
11699 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11700 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11701 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11703 }
11704
11705 // Store the input value into Value#0 of the stack slot.
11706 SDValue Store =
11707 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11708 // Load it out.
11709 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11710}
11711
11712SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11713 SelectionDAG &DAG) const {
11714 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11715 "Should only be called for ISD::INSERT_VECTOR_ELT");
11716
11717 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11718
11719 EVT VT = Op.getValueType();
11720 SDLoc dl(Op);
11721 SDValue V1 = Op.getOperand(0);
11722 SDValue V2 = Op.getOperand(1);
11723
11724 if (VT == MVT::v2f64 && C)
11725 return Op;
11726
11727 if (Subtarget.hasP9Vector()) {
11728 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11729 // because on P10, it allows this specific insert_vector_elt load pattern to
11730 // utilize the refactored load and store infrastructure in order to exploit
11731 // prefixed loads.
11732 // On targets with inexpensive direct moves (Power9 and up), a
11733 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11734 // load since a single precision load will involve conversion to double
11735 // precision on the load followed by another conversion to single precision.
11736 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11737 (isa<LoadSDNode>(V2))) {
11738 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11739 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11740 SDValue InsVecElt =
11741 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11742 BitcastLoad, Op.getOperand(2));
11743 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11744 }
11745 }
11746
11747 if (Subtarget.isISA3_1()) {
11748 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11749 return SDValue();
11750 // On P10, we have legal lowering for constant and variable indices for
11751 // all vectors.
11752 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11753 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11754 return Op;
11755 }
11756
11757 // Before P10, we have legal lowering for constant indices but not for
11758 // variable ones.
11759 if (!C)
11760 return SDValue();
11761
11762 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11763 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11764 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11765 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11766 unsigned InsertAtElement = C->getZExtValue();
11767 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11768 if (Subtarget.isLittleEndian()) {
11769 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11770 }
11771 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11772 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11773 }
11774 return Op;
11775}
11776
11777SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11778 SelectionDAG &DAG) const {
11779 SDLoc dl(Op);
11780 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11781 SDValue LoadChain = LN->getChain();
11782 SDValue BasePtr = LN->getBasePtr();
11783 EVT VT = Op.getValueType();
11784
11785 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11786 return Op;
11787
11788 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11789 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11790 // 2 or 4 vsx registers.
11791 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11792 "Type unsupported without MMA");
11793 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11794 "Type unsupported without paired vector support");
11795 Align Alignment = LN->getAlign();
11797 SmallVector<SDValue, 4> LoadChains;
11798 unsigned NumVecs = VT.getSizeInBits() / 128;
11799 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11800 SDValue Load =
11801 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11802 LN->getPointerInfo().getWithOffset(Idx * 16),
11803 commonAlignment(Alignment, Idx * 16),
11804 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11805 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11806 DAG.getConstant(16, dl, BasePtr.getValueType()));
11807 Loads.push_back(Load);
11808 LoadChains.push_back(Load.getValue(1));
11809 }
11810 if (Subtarget.isLittleEndian()) {
11811 std::reverse(Loads.begin(), Loads.end());
11812 std::reverse(LoadChains.begin(), LoadChains.end());
11813 }
11814 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11815 SDValue Value =
11816 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11817 dl, VT, Loads);
11818 SDValue RetOps[] = {Value, TF};
11819 return DAG.getMergeValues(RetOps, dl);
11820}
11821
11822SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11823 SelectionDAG &DAG) const {
11824 SDLoc dl(Op);
11825 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11826 SDValue StoreChain = SN->getChain();
11827 SDValue BasePtr = SN->getBasePtr();
11828 SDValue Value = SN->getValue();
11829 SDValue Value2 = SN->getValue();
11830 EVT StoreVT = Value.getValueType();
11831
11832 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11833 return Op;
11834
11835 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11836 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11837 // underlying registers individually.
11838 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11839 "Type unsupported without MMA");
11840 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11841 "Type unsupported without paired vector support");
11842 Align Alignment = SN->getAlign();
11844 unsigned NumVecs = 2;
11845 if (StoreVT == MVT::v512i1) {
11846 if (Subtarget.isISAFuture()) {
11847 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11848 MachineSDNode *ExtNode = DAG.getMachineNode(
11849 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11850
11851 Value = SDValue(ExtNode, 0);
11852 Value2 = SDValue(ExtNode, 1);
11853 } else
11854 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11855 NumVecs = 4;
11856 }
11857 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11858 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11859 SDValue Elt;
11860 if (Subtarget.isISAFuture()) {
11861 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11862 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11863 Idx > 1 ? Value2 : Value,
11864 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11865 } else
11866 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11867 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11868
11869 SDValue Store =
11870 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11871 SN->getPointerInfo().getWithOffset(Idx * 16),
11872 commonAlignment(Alignment, Idx * 16),
11873 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11874 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11875 DAG.getConstant(16, dl, BasePtr.getValueType()));
11876 Stores.push_back(Store);
11877 }
11878 SDValue TF = DAG.getTokenFactor(dl, Stores);
11879 return TF;
11880}
11881
11882SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11883 SDLoc dl(Op);
11884 if (Op.getValueType() == MVT::v4i32) {
11885 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11886
11887 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11888 // +16 as shift amt.
11889 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11890 SDValue RHSSwap = // = vrlw RHS, 16
11891 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11892
11893 // Shrinkify inputs to v8i16.
11894 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11895 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11896 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11897
11898 // Low parts multiplied together, generating 32-bit results (we ignore the
11899 // top parts).
11900 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11901 LHS, RHS, DAG, dl, MVT::v4i32);
11902
11903 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11904 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11905 // Shift the high parts up 16 bits.
11906 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11907 Neg16, DAG, dl);
11908 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11909 } else if (Op.getValueType() == MVT::v16i8) {
11910 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11911 bool isLittleEndian = Subtarget.isLittleEndian();
11912
11913 // Multiply the even 8-bit parts, producing 16-bit sums.
11914 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11915 LHS, RHS, DAG, dl, MVT::v8i16);
11916 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11917
11918 // Multiply the odd 8-bit parts, producing 16-bit sums.
11919 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11920 LHS, RHS, DAG, dl, MVT::v8i16);
11921 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11922
11923 // Merge the results together. Because vmuleub and vmuloub are
11924 // instructions with a big-endian bias, we must reverse the
11925 // element numbering and reverse the meaning of "odd" and "even"
11926 // when generating little endian code.
11927 int Ops[16];
11928 for (unsigned i = 0; i != 8; ++i) {
11929 if (isLittleEndian) {
11930 Ops[i*2 ] = 2*i;
11931 Ops[i*2+1] = 2*i+16;
11932 } else {
11933 Ops[i*2 ] = 2*i+1;
11934 Ops[i*2+1] = 2*i+1+16;
11935 }
11936 }
11937 if (isLittleEndian)
11938 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11939 else
11940 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11941 } else {
11942 llvm_unreachable("Unknown mul to lower!");
11943 }
11944}
11945
11946SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11947 bool IsStrict = Op->isStrictFPOpcode();
11948 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11949 !Subtarget.hasP9Vector())
11950 return SDValue();
11951
11952 return Op;
11953}
11954
11955// Custom lowering for fpext vf32 to v2f64
11956SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11957
11958 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11959 "Should only be called for ISD::FP_EXTEND");
11960
11961 // FIXME: handle extends from half precision float vectors on P9.
11962 // We only want to custom lower an extend from v2f32 to v2f64.
11963 if (Op.getValueType() != MVT::v2f64 ||
11964 Op.getOperand(0).getValueType() != MVT::v2f32)
11965 return SDValue();
11966
11967 SDLoc dl(Op);
11968 SDValue Op0 = Op.getOperand(0);
11969
11970 switch (Op0.getOpcode()) {
11971 default:
11972 return SDValue();
11974 assert(Op0.getNumOperands() == 2 &&
11975 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11976 "Node should have 2 operands with second one being a constant!");
11977
11978 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11979 return SDValue();
11980
11981 // Custom lower is only done for high or low doubleword.
11982 int Idx = Op0.getConstantOperandVal(1);
11983 if (Idx % 2 != 0)
11984 return SDValue();
11985
11986 // Since input is v4f32, at this point Idx is either 0 or 2.
11987 // Shift to get the doubleword position we want.
11988 int DWord = Idx >> 1;
11989
11990 // High and low word positions are different on little endian.
11991 if (Subtarget.isLittleEndian())
11992 DWord ^= 0x1;
11993
11994 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11995 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11996 }
11997 case ISD::FADD:
11998 case ISD::FMUL:
11999 case ISD::FSUB: {
12000 SDValue NewLoad[2];
12001 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12002 // Ensure both input are loads.
12003 SDValue LdOp = Op0.getOperand(i);
12004 if (LdOp.getOpcode() != ISD::LOAD)
12005 return SDValue();
12006 // Generate new load node.
12007 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12008 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12009 NewLoad[i] = DAG.getMemIntrinsicNode(
12010 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12011 LD->getMemoryVT(), LD->getMemOperand());
12012 }
12013 SDValue NewOp =
12014 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12015 NewLoad[1], Op0.getNode()->getFlags());
12016 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12017 DAG.getConstant(0, dl, MVT::i32));
12018 }
12019 case ISD::LOAD: {
12020 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12021 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12022 SDValue NewLd = DAG.getMemIntrinsicNode(
12023 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12024 LD->getMemoryVT(), LD->getMemOperand());
12025 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12026 DAG.getConstant(0, dl, MVT::i32));
12027 }
12028 }
12029 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12030}
12031
12032SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
12033 // Default to target independent lowering if there is a logical user of the
12034 // carry-bit.
12035 for (SDNode *U : Op->users()) {
12036 if (U->getOpcode() == ISD::SELECT)
12037 return SDValue();
12038 if (ISD::isBitwiseLogicOp(U->getOpcode())) {
12039 for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
12040 if (U->getOperand(i).getOpcode() != ISD::UADDO &&
12041 U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
12042 return SDValue();
12043 }
12044 }
12045 }
12046 SDValue LHS = Op.getOperand(0);
12047 SDValue RHS = Op.getOperand(1);
12048 SDLoc dl(Op);
12049
12050 // Default to target independent lowering for special cases handled there.
12051 if (isOneConstant(RHS) || isAllOnesConstant(RHS))
12052 return SDValue();
12053
12054 EVT VT = Op.getNode()->getValueType(0);
12055
12056 SDValue ADDC;
12057 SDValue Overflow;
12058 SDVTList VTs = Op.getNode()->getVTList();
12059
12060 ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
12061 Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
12062 DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
12063 ADDC.getValue(1));
12064 SDValue OverflowTrunc =
12065 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12066 SDValue Res =
12067 DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
12068 return Res;
12069}
12070
12071SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12072
12073 SDLoc dl(Op);
12074 SDValue LHS = Op.getOperand(0);
12075 SDValue RHS = Op.getOperand(1);
12076 EVT VT = Op.getNode()->getValueType(0);
12077
12078 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12079
12080 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12081 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12082
12083 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12084
12085 SDValue Overflow =
12086 DAG.getNode(ISD::SRL, dl, VT, And,
12087 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12088
12089 SDValue OverflowTrunc =
12090 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12091
12092 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12093}
12094
12095/// LowerOperation - Provide custom lowering hooks for some operations.
12096///
12098 switch (Op.getOpcode()) {
12099 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
12100 case ISD::UADDO: return LowerUaddo(Op, DAG);
12101 case ISD::FPOW: return lowerPow(Op, DAG);
12102 case ISD::FSIN: return lowerSin(Op, DAG);
12103 case ISD::FCOS: return lowerCos(Op, DAG);
12104 case ISD::FLOG: return lowerLog(Op, DAG);
12105 case ISD::FLOG10: return lowerLog10(Op, DAG);
12106 case ISD::FEXP: return lowerExp(Op, DAG);
12107 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12108 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12109 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12110 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12111 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12112 case ISD::STRICT_FSETCC:
12114 case ISD::SETCC: return LowerSETCC(Op, DAG);
12115 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12116 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12117 case ISD::SSUBO:
12118 return LowerSSUBO(Op, DAG);
12119
12120 case ISD::INLINEASM:
12121 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12122 // Variable argument lowering.
12123 case ISD::VASTART: return LowerVASTART(Op, DAG);
12124 case ISD::VAARG: return LowerVAARG(Op, DAG);
12125 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12126
12127 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12128 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12130 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12131
12132 // Exception handling lowering.
12133 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12134 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12135 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12136
12137 case ISD::LOAD: return LowerLOAD(Op, DAG);
12138 case ISD::STORE: return LowerSTORE(Op, DAG);
12139 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12140 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12143 case ISD::FP_TO_UINT:
12144 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12147 case ISD::UINT_TO_FP:
12148 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12149 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12150 case ISD::SET_ROUNDING:
12151 return LowerSET_ROUNDING(Op, DAG);
12152
12153 // Lower 64-bit shifts.
12154 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12155 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12156 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12157
12158 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12159 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12160
12161 // Vector-related lowering.
12162 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12163 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12164 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12165 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12166 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12167 case ISD::MUL: return LowerMUL(Op, DAG);
12168 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12170 case ISD::FP_ROUND:
12171 return LowerFP_ROUND(Op, DAG);
12172 case ISD::ROTL: return LowerROTL(Op, DAG);
12173
12174 // For counter-based loop handling.
12175 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12176
12177 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12178
12179 // Frame & Return address.
12180 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12181 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12182
12184 return LowerINTRINSIC_VOID(Op, DAG);
12185 case ISD::BSWAP:
12186 return LowerBSWAP(Op, DAG);
12188 return LowerATOMIC_CMP_SWAP(Op, DAG);
12189 case ISD::ATOMIC_STORE:
12190 return LowerATOMIC_LOAD_STORE(Op, DAG);
12191 case ISD::IS_FPCLASS:
12192 return LowerIS_FPCLASS(Op, DAG);
12193 }
12194}
12195
12198 SelectionDAG &DAG) const {
12199 SDLoc dl(N);
12200 switch (N->getOpcode()) {
12201 default:
12202 llvm_unreachable("Do not know how to custom type legalize this operation!");
12203 case ISD::ATOMIC_LOAD: {
12204 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12205 Results.push_back(Res);
12206 Results.push_back(Res.getValue(1));
12207 break;
12208 }
12209 case ISD::READCYCLECOUNTER: {
12210 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12211 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12212
12213 Results.push_back(
12214 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12215 Results.push_back(RTB.getValue(2));
12216 break;
12217 }
12219 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12220 break;
12221
12222 assert(N->getValueType(0) == MVT::i1 &&
12223 "Unexpected result type for CTR decrement intrinsic");
12224 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12225 N->getValueType(0));
12226 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12227 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12228 N->getOperand(1));
12229
12230 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12231 Results.push_back(NewInt.getValue(1));
12232 break;
12233 }
12235 switch (N->getConstantOperandVal(0)) {
12236 case Intrinsic::ppc_pack_longdouble:
12237 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12238 N->getOperand(2), N->getOperand(1)));
12239 break;
12240 case Intrinsic::ppc_maxfe:
12241 case Intrinsic::ppc_minfe:
12242 case Intrinsic::ppc_fnmsub:
12243 case Intrinsic::ppc_convert_f128_to_ppcf128:
12244 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12245 break;
12246 }
12247 break;
12248 }
12249 case ISD::VAARG: {
12250 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12251 return;
12252
12253 EVT VT = N->getValueType(0);
12254
12255 if (VT == MVT::i64) {
12256 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12257
12258 Results.push_back(NewNode);
12259 Results.push_back(NewNode.getValue(1));
12260 }
12261 return;
12262 }
12265 case ISD::FP_TO_SINT:
12266 case ISD::FP_TO_UINT: {
12267 // LowerFP_TO_INT() can only handle f32 and f64.
12268 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12269 MVT::ppcf128)
12270 return;
12271 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12272 Results.push_back(LoweredValue);
12273 if (N->isStrictFPOpcode())
12274 Results.push_back(LoweredValue.getValue(1));
12275 return;
12276 }
12277 case ISD::TRUNCATE: {
12278 if (!N->getValueType(0).isVector())
12279 return;
12280 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12281 if (Lowered)
12282 Results.push_back(Lowered);
12283 return;
12284 }
12285 case ISD::SCALAR_TO_VECTOR: {
12286 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12287 if (Lowered)
12288 Results.push_back(Lowered);
12289 return;
12290 }
12291 case ISD::FSHL:
12292 case ISD::FSHR:
12293 // Don't handle funnel shifts here.
12294 return;
12295 case ISD::BITCAST:
12296 // Don't handle bitcast here.
12297 return;
12298 case ISD::FP_EXTEND:
12299 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12300 if (Lowered)
12301 Results.push_back(Lowered);
12302 return;
12303 }
12304}
12305
12306//===----------------------------------------------------------------------===//
12307// Other Lowering Code
12308//===----------------------------------------------------------------------===//
12309
12311 return Builder.CreateIntrinsic(Id, {}, {});
12312}
12313
12314// The mappings for emitLeading/TrailingFence is taken from
12315// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12317 Instruction *Inst,
12318 AtomicOrdering Ord) const {
12320 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12321 if (isReleaseOrStronger(Ord))
12322 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12323 return nullptr;
12324}
12325
12327 Instruction *Inst,
12328 AtomicOrdering Ord) const {
12329 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12330 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12331 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12332 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12333 if (isa<LoadInst>(Inst))
12334 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12335 {Inst});
12336 // FIXME: Can use isync for rmw operation.
12337 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12338 }
12339 return nullptr;
12340}
12341
12344 unsigned AtomicSize,
12345 unsigned BinOpcode,
12346 unsigned CmpOpcode,
12347 unsigned CmpPred) const {
12348 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12349 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12350
12351 auto LoadMnemonic = PPC::LDARX;
12352 auto StoreMnemonic = PPC::STDCX;
12353 switch (AtomicSize) {
12354 default:
12355 llvm_unreachable("Unexpected size of atomic entity");
12356 case 1:
12357 LoadMnemonic = PPC::LBARX;
12358 StoreMnemonic = PPC::STBCX;
12359 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12360 break;
12361 case 2:
12362 LoadMnemonic = PPC::LHARX;
12363 StoreMnemonic = PPC::STHCX;
12364 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12365 break;
12366 case 4:
12367 LoadMnemonic = PPC::LWARX;
12368 StoreMnemonic = PPC::STWCX;
12369 break;
12370 case 8:
12371 LoadMnemonic = PPC::LDARX;
12372 StoreMnemonic = PPC::STDCX;
12373 break;
12374 }
12375
12376 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12377 MachineFunction *F = BB->getParent();
12379
12380 Register dest = MI.getOperand(0).getReg();
12381 Register ptrA = MI.getOperand(1).getReg();
12382 Register ptrB = MI.getOperand(2).getReg();
12383 Register incr = MI.getOperand(3).getReg();
12384 DebugLoc dl = MI.getDebugLoc();
12385
12386 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12387 MachineBasicBlock *loop2MBB =
12388 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12389 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12390 F->insert(It, loopMBB);
12391 if (CmpOpcode)
12392 F->insert(It, loop2MBB);
12393 F->insert(It, exitMBB);
12394 exitMBB->splice(exitMBB->begin(), BB,
12395 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12397
12398 MachineRegisterInfo &RegInfo = F->getRegInfo();
12399 Register TmpReg = (!BinOpcode) ? incr :
12400 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12401 : &PPC::GPRCRegClass);
12402
12403 // thisMBB:
12404 // ...
12405 // fallthrough --> loopMBB
12406 BB->addSuccessor(loopMBB);
12407
12408 // loopMBB:
12409 // l[wd]arx dest, ptr
12410 // add r0, dest, incr
12411 // st[wd]cx. r0, ptr
12412 // bne- loopMBB
12413 // fallthrough --> exitMBB
12414
12415 // For max/min...
12416 // loopMBB:
12417 // l[wd]arx dest, ptr
12418 // cmpl?[wd] dest, incr
12419 // bgt exitMBB
12420 // loop2MBB:
12421 // st[wd]cx. dest, ptr
12422 // bne- loopMBB
12423 // fallthrough --> exitMBB
12424
12425 BB = loopMBB;
12426 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12427 .addReg(ptrA).addReg(ptrB);
12428 if (BinOpcode)
12429 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12430 if (CmpOpcode) {
12431 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12432 // Signed comparisons of byte or halfword values must be sign-extended.
12433 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12434 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12435 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12436 ExtReg).addReg(dest);
12437 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12438 } else
12439 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12440
12441 BuildMI(BB, dl, TII->get(PPC::BCC))
12442 .addImm(CmpPred)
12443 .addReg(CrReg)
12444 .addMBB(exitMBB);
12445 BB->addSuccessor(loop2MBB);
12446 BB->addSuccessor(exitMBB);
12447 BB = loop2MBB;
12448 }
12449 BuildMI(BB, dl, TII->get(StoreMnemonic))
12450 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12451 BuildMI(BB, dl, TII->get(PPC::BCC))
12452 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12453 BB->addSuccessor(loopMBB);
12454 BB->addSuccessor(exitMBB);
12455
12456 // exitMBB:
12457 // ...
12458 BB = exitMBB;
12459 return BB;
12460}
12461
12463 switch(MI.getOpcode()) {
12464 default:
12465 return false;
12466 case PPC::COPY:
12467 return TII->isSignExtended(MI.getOperand(1).getReg(),
12468 &MI.getMF()->getRegInfo());
12469 case PPC::LHA:
12470 case PPC::LHA8:
12471 case PPC::LHAU:
12472 case PPC::LHAU8:
12473 case PPC::LHAUX:
12474 case PPC::LHAUX8:
12475 case PPC::LHAX:
12476 case PPC::LHAX8:
12477 case PPC::LWA:
12478 case PPC::LWAUX:
12479 case PPC::LWAX:
12480 case PPC::LWAX_32:
12481 case PPC::LWA_32:
12482 case PPC::PLHA:
12483 case PPC::PLHA8:
12484 case PPC::PLHA8pc:
12485 case PPC::PLHApc:
12486 case PPC::PLWA:
12487 case PPC::PLWA8:
12488 case PPC::PLWA8pc:
12489 case PPC::PLWApc:
12490 case PPC::EXTSB:
12491 case PPC::EXTSB8:
12492 case PPC::EXTSB8_32_64:
12493 case PPC::EXTSB8_rec:
12494 case PPC::EXTSB_rec:
12495 case PPC::EXTSH:
12496 case PPC::EXTSH8:
12497 case PPC::EXTSH8_32_64:
12498 case PPC::EXTSH8_rec:
12499 case PPC::EXTSH_rec:
12500 case PPC::EXTSW:
12501 case PPC::EXTSWSLI:
12502 case PPC::EXTSWSLI_32_64:
12503 case PPC::EXTSWSLI_32_64_rec:
12504 case PPC::EXTSWSLI_rec:
12505 case PPC::EXTSW_32:
12506 case PPC::EXTSW_32_64:
12507 case PPC::EXTSW_32_64_rec:
12508 case PPC::EXTSW_rec:
12509 case PPC::SRAW:
12510 case PPC::SRAWI:
12511 case PPC::SRAWI_rec:
12512 case PPC::SRAW_rec:
12513 return true;
12514 }
12515 return false;
12516}
12517
12520 bool is8bit, // operation
12521 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12522 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12523 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12524
12525 // If this is a signed comparison and the value being compared is not known
12526 // to be sign extended, sign extend it here.
12527 DebugLoc dl = MI.getDebugLoc();
12528 MachineFunction *F = BB->getParent();
12529 MachineRegisterInfo &RegInfo = F->getRegInfo();
12530 Register incr = MI.getOperand(3).getReg();
12531 bool IsSignExtended =
12532 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12533
12534 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12535 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12536 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12537 .addReg(MI.getOperand(3).getReg());
12538 MI.getOperand(3).setReg(ValueReg);
12539 incr = ValueReg;
12540 }
12541 // If we support part-word atomic mnemonics, just use them
12542 if (Subtarget.hasPartwordAtomics())
12543 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12544 CmpPred);
12545
12546 // In 64 bit mode we have to use 64 bits for addresses, even though the
12547 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12548 // registers without caring whether they're 32 or 64, but here we're
12549 // doing actual arithmetic on the addresses.
12550 bool is64bit = Subtarget.isPPC64();
12551 bool isLittleEndian = Subtarget.isLittleEndian();
12552 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12553
12554 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12556
12557 Register dest = MI.getOperand(0).getReg();
12558 Register ptrA = MI.getOperand(1).getReg();
12559 Register ptrB = MI.getOperand(2).getReg();
12560
12561 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12562 MachineBasicBlock *loop2MBB =
12563 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12564 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12565 F->insert(It, loopMBB);
12566 if (CmpOpcode)
12567 F->insert(It, loop2MBB);
12568 F->insert(It, exitMBB);
12569 exitMBB->splice(exitMBB->begin(), BB,
12570 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12572
12573 const TargetRegisterClass *RC =
12574 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12575 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12576
12577 Register PtrReg = RegInfo.createVirtualRegister(RC);
12578 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12579 Register ShiftReg =
12580 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12581 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12582 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12583 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12584 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12585 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12586 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12587 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12588 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12589 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12590 Register Ptr1Reg;
12591 Register TmpReg =
12592 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12593
12594 // thisMBB:
12595 // ...
12596 // fallthrough --> loopMBB
12597 BB->addSuccessor(loopMBB);
12598
12599 // The 4-byte load must be aligned, while a char or short may be
12600 // anywhere in the word. Hence all this nasty bookkeeping code.
12601 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12602 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12603 // xori shift, shift1, 24 [16]
12604 // rlwinm ptr, ptr1, 0, 0, 29
12605 // slw incr2, incr, shift
12606 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12607 // slw mask, mask2, shift
12608 // loopMBB:
12609 // lwarx tmpDest, ptr
12610 // add tmp, tmpDest, incr2
12611 // andc tmp2, tmpDest, mask
12612 // and tmp3, tmp, mask
12613 // or tmp4, tmp3, tmp2
12614 // stwcx. tmp4, ptr
12615 // bne- loopMBB
12616 // fallthrough --> exitMBB
12617 // srw SrwDest, tmpDest, shift
12618 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12619 if (ptrA != ZeroReg) {
12620 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12621 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12622 .addReg(ptrA)
12623 .addReg(ptrB);
12624 } else {
12625 Ptr1Reg = ptrB;
12626 }
12627 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12628 // mode.
12629 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12630 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12631 .addImm(3)
12632 .addImm(27)
12633 .addImm(is8bit ? 28 : 27);
12634 if (!isLittleEndian)
12635 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12636 .addReg(Shift1Reg)
12637 .addImm(is8bit ? 24 : 16);
12638 if (is64bit)
12639 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12640 .addReg(Ptr1Reg)
12641 .addImm(0)
12642 .addImm(61);
12643 else
12644 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12645 .addReg(Ptr1Reg)
12646 .addImm(0)
12647 .addImm(0)
12648 .addImm(29);
12649 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12650 if (is8bit)
12651 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12652 else {
12653 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12654 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12655 .addReg(Mask3Reg)
12656 .addImm(65535);
12657 }
12658 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12659 .addReg(Mask2Reg)
12660 .addReg(ShiftReg);
12661
12662 BB = loopMBB;
12663 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12664 .addReg(ZeroReg)
12665 .addReg(PtrReg);
12666 if (BinOpcode)
12667 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12668 .addReg(Incr2Reg)
12669 .addReg(TmpDestReg);
12670 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12671 .addReg(TmpDestReg)
12672 .addReg(MaskReg);
12673 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12674 if (CmpOpcode) {
12675 // For unsigned comparisons, we can directly compare the shifted values.
12676 // For signed comparisons we shift and sign extend.
12677 Register SReg = RegInfo.createVirtualRegister(GPRC);
12678 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12679 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12680 .addReg(TmpDestReg)
12681 .addReg(MaskReg);
12682 unsigned ValueReg = SReg;
12683 unsigned CmpReg = Incr2Reg;
12684 if (CmpOpcode == PPC::CMPW) {
12685 ValueReg = RegInfo.createVirtualRegister(GPRC);
12686 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12687 .addReg(SReg)
12688 .addReg(ShiftReg);
12689 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12690 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12691 .addReg(ValueReg);
12692 ValueReg = ValueSReg;
12693 CmpReg = incr;
12694 }
12695 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12696 BuildMI(BB, dl, TII->get(PPC::BCC))
12697 .addImm(CmpPred)
12698 .addReg(CrReg)
12699 .addMBB(exitMBB);
12700 BB->addSuccessor(loop2MBB);
12701 BB->addSuccessor(exitMBB);
12702 BB = loop2MBB;
12703 }
12704 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12705 BuildMI(BB, dl, TII->get(PPC::STWCX))
12706 .addReg(Tmp4Reg)
12707 .addReg(ZeroReg)
12708 .addReg(PtrReg);
12709 BuildMI(BB, dl, TII->get(PPC::BCC))
12711 .addReg(PPC::CR0)
12712 .addMBB(loopMBB);
12713 BB->addSuccessor(loopMBB);
12714 BB->addSuccessor(exitMBB);
12715
12716 // exitMBB:
12717 // ...
12718 BB = exitMBB;
12719 // Since the shift amount is not a constant, we need to clear
12720 // the upper bits with a separate RLWINM.
12721 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12722 .addReg(SrwDestReg)
12723 .addImm(0)
12724 .addImm(is8bit ? 24 : 16)
12725 .addImm(31);
12726 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12727 .addReg(TmpDestReg)
12728 .addReg(ShiftReg);
12729 return BB;
12730}
12731
12734 MachineBasicBlock *MBB) const {
12735 DebugLoc DL = MI.getDebugLoc();
12736 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12737 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12738
12739 MachineFunction *MF = MBB->getParent();
12741
12742 const BasicBlock *BB = MBB->getBasicBlock();
12744
12745 Register DstReg = MI.getOperand(0).getReg();
12746 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12747 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12748 Register mainDstReg = MRI.createVirtualRegister(RC);
12749 Register restoreDstReg = MRI.createVirtualRegister(RC);
12750
12751 MVT PVT = getPointerTy(MF->getDataLayout());
12752 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12753 "Invalid Pointer Size!");
12754 // For v = setjmp(buf), we generate
12755 //
12756 // thisMBB:
12757 // SjLjSetup mainMBB
12758 // bl mainMBB
12759 // v_restore = 1
12760 // b sinkMBB
12761 //
12762 // mainMBB:
12763 // buf[LabelOffset] = LR
12764 // v_main = 0
12765 //
12766 // sinkMBB:
12767 // v = phi(main, restore)
12768 //
12769
12770 MachineBasicBlock *thisMBB = MBB;
12771 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12772 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12773 MF->insert(I, mainMBB);
12774 MF->insert(I, sinkMBB);
12775
12777
12778 // Transfer the remainder of BB and its successor edges to sinkMBB.
12779 sinkMBB->splice(sinkMBB->begin(), MBB,
12780 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12782
12783 // Note that the structure of the jmp_buf used here is not compatible
12784 // with that used by libc, and is not designed to be. Specifically, it
12785 // stores only those 'reserved' registers that LLVM does not otherwise
12786 // understand how to spill. Also, by convention, by the time this
12787 // intrinsic is called, Clang has already stored the frame address in the
12788 // first slot of the buffer and stack address in the third. Following the
12789 // X86 target code, we'll store the jump address in the second slot. We also
12790 // need to save the TOC pointer (R2) to handle jumps between shared
12791 // libraries, and that will be stored in the fourth slot. The thread
12792 // identifier (R13) is not affected.
12793
12794 // thisMBB:
12795 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12796 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12797 const int64_t BPOffset = 4 * PVT.getStoreSize();
12798
12799 // Prepare IP either in reg.
12800 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12801 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12802 Register BufReg = MI.getOperand(1).getReg();
12803
12804 if (Subtarget.is64BitELFABI()) {
12806 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12807 .addReg(PPC::X2)
12808 .addImm(TOCOffset)
12809 .addReg(BufReg)
12810 .cloneMemRefs(MI);
12811 }
12812
12813 // Naked functions never have a base pointer, and so we use r1. For all
12814 // other functions, this decision must be delayed until during PEI.
12815 unsigned BaseReg;
12816 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12817 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12818 else
12819 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12820
12821 MIB = BuildMI(*thisMBB, MI, DL,
12822 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12823 .addReg(BaseReg)
12824 .addImm(BPOffset)
12825 .addReg(BufReg)
12826 .cloneMemRefs(MI);
12827
12828 // Setup
12829 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12830 MIB.addRegMask(TRI->getNoPreservedMask());
12831
12832 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12833
12834 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12835 .addMBB(mainMBB);
12836 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12837
12838 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12839 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12840
12841 // mainMBB:
12842 // mainDstReg = 0
12843 MIB =
12844 BuildMI(mainMBB, DL,
12845 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12846
12847 // Store IP
12848 if (Subtarget.isPPC64()) {
12849 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12850 .addReg(LabelReg)
12851 .addImm(LabelOffset)
12852 .addReg(BufReg);
12853 } else {
12854 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12855 .addReg(LabelReg)
12856 .addImm(LabelOffset)
12857 .addReg(BufReg);
12858 }
12859 MIB.cloneMemRefs(MI);
12860
12861 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12862 mainMBB->addSuccessor(sinkMBB);
12863
12864 // sinkMBB:
12865 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12866 TII->get(PPC::PHI), DstReg)
12867 .addReg(mainDstReg).addMBB(mainMBB)
12868 .addReg(restoreDstReg).addMBB(thisMBB);
12869
12870 MI.eraseFromParent();
12871 return sinkMBB;
12872}
12873
12876 MachineBasicBlock *MBB) const {
12877 DebugLoc DL = MI.getDebugLoc();
12878 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12879
12880 MachineFunction *MF = MBB->getParent();
12882
12883 MVT PVT = getPointerTy(MF->getDataLayout());
12884 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12885 "Invalid Pointer Size!");
12886
12887 const TargetRegisterClass *RC =
12888 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12889 Register Tmp = MRI.createVirtualRegister(RC);
12890 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12891 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12892 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12893 unsigned BP =
12894 (PVT == MVT::i64)
12895 ? PPC::X30
12896 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12897 : PPC::R30);
12898
12900
12901 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12902 const int64_t SPOffset = 2 * PVT.getStoreSize();
12903 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12904 const int64_t BPOffset = 4 * PVT.getStoreSize();
12905
12906 Register BufReg = MI.getOperand(0).getReg();
12907
12908 // Reload FP (the jumped-to function may not have had a
12909 // frame pointer, and if so, then its r31 will be restored
12910 // as necessary).
12911 if (PVT == MVT::i64) {
12912 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12913 .addImm(0)
12914 .addReg(BufReg);
12915 } else {
12916 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12917 .addImm(0)
12918 .addReg(BufReg);
12919 }
12920 MIB.cloneMemRefs(MI);
12921
12922 // Reload IP
12923 if (PVT == MVT::i64) {
12924 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12925 .addImm(LabelOffset)
12926 .addReg(BufReg);
12927 } else {
12928 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12929 .addImm(LabelOffset)
12930 .addReg(BufReg);
12931 }
12932 MIB.cloneMemRefs(MI);
12933
12934 // Reload SP
12935 if (PVT == MVT::i64) {
12936 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12937 .addImm(SPOffset)
12938 .addReg(BufReg);
12939 } else {
12940 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12941 .addImm(SPOffset)
12942 .addReg(BufReg);
12943 }
12944 MIB.cloneMemRefs(MI);
12945
12946 // Reload BP
12947 if (PVT == MVT::i64) {
12948 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12949 .addImm(BPOffset)
12950 .addReg(BufReg);
12951 } else {
12952 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12953 .addImm(BPOffset)
12954 .addReg(BufReg);
12955 }
12956 MIB.cloneMemRefs(MI);
12957
12958 // Reload TOC
12959 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12961 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12962 .addImm(TOCOffset)
12963 .addReg(BufReg)
12964 .cloneMemRefs(MI);
12965 }
12966
12967 // Jump
12968 BuildMI(*MBB, MI, DL,
12969 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12970 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12971
12972 MI.eraseFromParent();
12973 return MBB;
12974}
12975
12977 // If the function specifically requests inline stack probes, emit them.
12978 if (MF.getFunction().hasFnAttribute("probe-stack"))
12979 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12980 "inline-asm";
12981 return false;
12982}
12983
12985 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12986 unsigned StackAlign = TFI->getStackAlignment();
12987 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12988 "Unexpected stack alignment");
12989 // The default stack probe size is 4096 if the function has no
12990 // stack-probe-size attribute.
12991 const Function &Fn = MF.getFunction();
12992 unsigned StackProbeSize =
12993 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12994 // Round down to the stack alignment.
12995 StackProbeSize &= ~(StackAlign - 1);
12996 return StackProbeSize ? StackProbeSize : StackAlign;
12997}
12998
12999// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13000// into three phases. In the first phase, it uses pseudo instruction
13001// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13002// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13003// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13004// MaxCallFrameSize so that it can calculate correct data area pointer.
13007 MachineBasicBlock *MBB) const {
13008 const bool isPPC64 = Subtarget.isPPC64();
13009 MachineFunction *MF = MBB->getParent();
13010 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13011 DebugLoc DL = MI.getDebugLoc();
13012 const unsigned ProbeSize = getStackProbeSize(*MF);
13013 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13015 // The CFG of probing stack looks as
13016 // +-----+
13017 // | MBB |
13018 // +--+--+
13019 // |
13020 // +----v----+
13021 // +--->+ TestMBB +---+
13022 // | +----+----+ |
13023 // | | |
13024 // | +-----v----+ |
13025 // +---+ BlockMBB | |
13026 // +----------+ |
13027 // |
13028 // +---------+ |
13029 // | TailMBB +<--+
13030 // +---------+
13031 // In MBB, calculate previous frame pointer and final stack pointer.
13032 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13033 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13034 // TailMBB is spliced via \p MI.
13035 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13036 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13037 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13038
13040 MF->insert(MBBIter, TestMBB);
13041 MF->insert(MBBIter, BlockMBB);
13042 MF->insert(MBBIter, TailMBB);
13043
13044 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13045 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13046
13047 Register DstReg = MI.getOperand(0).getReg();
13048 Register NegSizeReg = MI.getOperand(1).getReg();
13049 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13050 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13051 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13052 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13053
13054 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13055 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13056 // NegSize.
13057 unsigned ProbeOpc;
13058 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13059 ProbeOpc =
13060 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13061 else
13062 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13063 // and NegSizeReg will be allocated in the same phyreg to avoid
13064 // redundant copy when NegSizeReg has only one use which is current MI and
13065 // will be replaced by PREPARE_PROBED_ALLOCA then.
13066 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13067 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13068 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13069 .addDef(ActualNegSizeReg)
13070 .addReg(NegSizeReg)
13071 .add(MI.getOperand(2))
13072 .add(MI.getOperand(3));
13073
13074 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13075 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13076 FinalStackPtr)
13077 .addReg(SPReg)
13078 .addReg(ActualNegSizeReg);
13079
13080 // Materialize a scratch register for update.
13081 int64_t NegProbeSize = -(int64_t)ProbeSize;
13082 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13083 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13084 if (!isInt<16>(NegProbeSize)) {
13085 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13086 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13087 .addImm(NegProbeSize >> 16);
13088 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13089 ScratchReg)
13090 .addReg(TempReg)
13091 .addImm(NegProbeSize & 0xFFFF);
13092 } else
13093 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13094 .addImm(NegProbeSize);
13095
13096 {
13097 // Probing leading residual part.
13098 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13099 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13100 .addReg(ActualNegSizeReg)
13101 .addReg(ScratchReg);
13102 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13103 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13104 .addReg(Div)
13105 .addReg(ScratchReg);
13106 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13107 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13108 .addReg(Mul)
13109 .addReg(ActualNegSizeReg);
13110 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13111 .addReg(FramePointer)
13112 .addReg(SPReg)
13113 .addReg(NegMod);
13114 }
13115
13116 {
13117 // Remaining part should be multiple of ProbeSize.
13118 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13119 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13120 .addReg(SPReg)
13121 .addReg(FinalStackPtr);
13122 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13124 .addReg(CmpResult)
13125 .addMBB(TailMBB);
13126 TestMBB->addSuccessor(BlockMBB);
13127 TestMBB->addSuccessor(TailMBB);
13128 }
13129
13130 {
13131 // Touch the block.
13132 // |P...|P...|P...
13133 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13134 .addReg(FramePointer)
13135 .addReg(SPReg)
13136 .addReg(ScratchReg);
13137 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13138 BlockMBB->addSuccessor(TestMBB);
13139 }
13140
13141 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13142 // DYNAREAOFFSET pseudo instruction to get the future result.
13143 Register MaxCallFrameSizeReg =
13144 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13145 BuildMI(TailMBB, DL,
13146 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13147 MaxCallFrameSizeReg)
13148 .add(MI.getOperand(2))
13149 .add(MI.getOperand(3));
13150 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13151 .addReg(SPReg)
13152 .addReg(MaxCallFrameSizeReg);
13153
13154 // Splice instructions after MI to TailMBB.
13155 TailMBB->splice(TailMBB->end(), MBB,
13156 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13158 MBB->addSuccessor(TestMBB);
13159
13160 // Delete the pseudo instruction.
13161 MI.eraseFromParent();
13162
13163 ++NumDynamicAllocaProbed;
13164 return TailMBB;
13165}
13166
13168 switch (MI.getOpcode()) {
13169 case PPC::SELECT_CC_I4:
13170 case PPC::SELECT_CC_I8:
13171 case PPC::SELECT_CC_F4:
13172 case PPC::SELECT_CC_F8:
13173 case PPC::SELECT_CC_F16:
13174 case PPC::SELECT_CC_VRRC:
13175 case PPC::SELECT_CC_VSFRC:
13176 case PPC::SELECT_CC_VSSRC:
13177 case PPC::SELECT_CC_VSRC:
13178 case PPC::SELECT_CC_SPE4:
13179 case PPC::SELECT_CC_SPE:
13180 return true;
13181 default:
13182 return false;
13183 }
13184}
13185
13186static bool IsSelect(MachineInstr &MI) {
13187 switch (MI.getOpcode()) {
13188 case PPC::SELECT_I4:
13189 case PPC::SELECT_I8:
13190 case PPC::SELECT_F4:
13191 case PPC::SELECT_F8:
13192 case PPC::SELECT_F16:
13193 case PPC::SELECT_SPE:
13194 case PPC::SELECT_SPE4:
13195 case PPC::SELECT_VRRC:
13196 case PPC::SELECT_VSFRC:
13197 case PPC::SELECT_VSSRC:
13198 case PPC::SELECT_VSRC:
13199 return true;
13200 default:
13201 return false;
13202 }
13203}
13204
13207 MachineBasicBlock *BB) const {
13208 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13209 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13210 if (Subtarget.is64BitELFABI() &&
13211 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13212 !Subtarget.isUsingPCRelativeCalls()) {
13213 // Call lowering should have added an r2 operand to indicate a dependence
13214 // on the TOC base pointer value. It can't however, because there is no
13215 // way to mark the dependence as implicit there, and so the stackmap code
13216 // will confuse it with a regular operand. Instead, add the dependence
13217 // here.
13218 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13219 }
13220
13221 return emitPatchPoint(MI, BB);
13222 }
13223
13224 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13225 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13226 return emitEHSjLjSetJmp(MI, BB);
13227 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13228 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13229 return emitEHSjLjLongJmp(MI, BB);
13230 }
13231
13232 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13233
13234 // To "insert" these instructions we actually have to insert their
13235 // control-flow patterns.
13236 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13238
13239 MachineFunction *F = BB->getParent();
13240 MachineRegisterInfo &MRI = F->getRegInfo();
13241
13242 if (Subtarget.hasISEL() &&
13243 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13244 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13245 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13247 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13248 MI.getOpcode() == PPC::SELECT_CC_I8)
13249 Cond.push_back(MI.getOperand(4));
13250 else
13252 Cond.push_back(MI.getOperand(1));
13253
13254 DebugLoc dl = MI.getDebugLoc();
13255 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13256 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13257 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13258 // The incoming instruction knows the destination vreg to set, the
13259 // condition code register to branch on, the true/false values to
13260 // select between, and a branch opcode to use.
13261
13262 // thisMBB:
13263 // ...
13264 // TrueVal = ...
13265 // cmpTY ccX, r1, r2
13266 // bCC sinkMBB
13267 // fallthrough --> copy0MBB
13268 MachineBasicBlock *thisMBB = BB;
13269 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13270 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13271 DebugLoc dl = MI.getDebugLoc();
13272 F->insert(It, copy0MBB);
13273 F->insert(It, sinkMBB);
13274
13275 // Set the call frame size on entry to the new basic blocks.
13276 // See https://reviews.llvm.org/D156113.
13277 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13278 copy0MBB->setCallFrameSize(CallFrameSize);
13279 sinkMBB->setCallFrameSize(CallFrameSize);
13280
13281 // Transfer the remainder of BB and its successor edges to sinkMBB.
13282 sinkMBB->splice(sinkMBB->begin(), BB,
13283 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13285
13286 // Next, add the true and fallthrough blocks as its successors.
13287 BB->addSuccessor(copy0MBB);
13288 BB->addSuccessor(sinkMBB);
13289
13290 if (IsSelect(MI)) {
13291 BuildMI(BB, dl, TII->get(PPC::BC))
13292 .addReg(MI.getOperand(1).getReg())
13293 .addMBB(sinkMBB);
13294 } else {
13295 unsigned SelectPred = MI.getOperand(4).getImm();
13296 BuildMI(BB, dl, TII->get(PPC::BCC))
13297 .addImm(SelectPred)
13298 .addReg(MI.getOperand(1).getReg())
13299 .addMBB(sinkMBB);
13300 }
13301
13302 // copy0MBB:
13303 // %FalseValue = ...
13304 // # fallthrough to sinkMBB
13305 BB = copy0MBB;
13306
13307 // Update machine-CFG edges
13308 BB->addSuccessor(sinkMBB);
13309
13310 // sinkMBB:
13311 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13312 // ...
13313 BB = sinkMBB;
13314 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13315 .addReg(MI.getOperand(3).getReg())
13316 .addMBB(copy0MBB)
13317 .addReg(MI.getOperand(2).getReg())
13318 .addMBB(thisMBB);
13319 } else if (MI.getOpcode() == PPC::ReadTB) {
13320 // To read the 64-bit time-base register on a 32-bit target, we read the
13321 // two halves. Should the counter have wrapped while it was being read, we
13322 // need to try again.
13323 // ...
13324 // readLoop:
13325 // mfspr Rx,TBU # load from TBU
13326 // mfspr Ry,TB # load from TB
13327 // mfspr Rz,TBU # load from TBU
13328 // cmpw crX,Rx,Rz # check if 'old'='new'
13329 // bne readLoop # branch if they're not equal
13330 // ...
13331
13332 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13333 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13334 DebugLoc dl = MI.getDebugLoc();
13335 F->insert(It, readMBB);
13336 F->insert(It, sinkMBB);
13337
13338 // Transfer the remainder of BB and its successor edges to sinkMBB.
13339 sinkMBB->splice(sinkMBB->begin(), BB,
13340 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13342
13343 BB->addSuccessor(readMBB);
13344 BB = readMBB;
13345
13346 MachineRegisterInfo &RegInfo = F->getRegInfo();
13347 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13348 Register LoReg = MI.getOperand(0).getReg();
13349 Register HiReg = MI.getOperand(1).getReg();
13350
13351 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13352 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13353 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13354
13355 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13356
13357 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13358 .addReg(HiReg)
13359 .addReg(ReadAgainReg);
13360 BuildMI(BB, dl, TII->get(PPC::BCC))
13362 .addReg(CmpReg)
13363 .addMBB(readMBB);
13364
13365 BB->addSuccessor(readMBB);
13366 BB->addSuccessor(sinkMBB);
13367 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13368 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13369 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13370 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13371 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13372 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13373 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13374 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13375
13376 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13377 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13378 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13379 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13380 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13381 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13382 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13383 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13384
13385 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13386 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13387 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13388 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13389 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13390 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13391 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13392 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13393
13394 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13395 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13396 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13397 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13398 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13399 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13400 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13401 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13402
13403 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13404 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13405 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13406 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13407 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13408 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13409 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13410 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13411
13412 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13413 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13414 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13415 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13416 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13417 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13418 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13419 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13420
13421 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13422 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13423 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13424 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13425 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13426 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13427 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13428 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13429
13430 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13431 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13432 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13433 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13434 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13435 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13436 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13437 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13438
13439 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13440 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13441 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13442 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13443 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13444 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13445 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13446 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13447
13448 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13449 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13450 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13451 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13452 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13453 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13454 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13455 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13456
13457 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13458 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13459 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13460 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13461 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13462 BB = EmitAtomicBinary(MI, BB, 4, 0);
13463 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13464 BB = EmitAtomicBinary(MI, BB, 8, 0);
13465 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13466 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13467 (Subtarget.hasPartwordAtomics() &&
13468 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13469 (Subtarget.hasPartwordAtomics() &&
13470 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13471 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13472
13473 auto LoadMnemonic = PPC::LDARX;
13474 auto StoreMnemonic = PPC::STDCX;
13475 switch (MI.getOpcode()) {
13476 default:
13477 llvm_unreachable("Compare and swap of unknown size");
13478 case PPC::ATOMIC_CMP_SWAP_I8:
13479 LoadMnemonic = PPC::LBARX;
13480 StoreMnemonic = PPC::STBCX;
13481 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13482 break;
13483 case PPC::ATOMIC_CMP_SWAP_I16:
13484 LoadMnemonic = PPC::LHARX;
13485 StoreMnemonic = PPC::STHCX;
13486 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13487 break;
13488 case PPC::ATOMIC_CMP_SWAP_I32:
13489 LoadMnemonic = PPC::LWARX;
13490 StoreMnemonic = PPC::STWCX;
13491 break;
13492 case PPC::ATOMIC_CMP_SWAP_I64:
13493 LoadMnemonic = PPC::LDARX;
13494 StoreMnemonic = PPC::STDCX;
13495 break;
13496 }
13497 MachineRegisterInfo &RegInfo = F->getRegInfo();
13498 Register dest = MI.getOperand(0).getReg();
13499 Register ptrA = MI.getOperand(1).getReg();
13500 Register ptrB = MI.getOperand(2).getReg();
13501 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13502 Register oldval = MI.getOperand(3).getReg();
13503 Register newval = MI.getOperand(4).getReg();
13504 DebugLoc dl = MI.getDebugLoc();
13505
13506 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13507 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13508 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13509 F->insert(It, loop1MBB);
13510 F->insert(It, loop2MBB);
13511 F->insert(It, exitMBB);
13512 exitMBB->splice(exitMBB->begin(), BB,
13513 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13515
13516 // thisMBB:
13517 // ...
13518 // fallthrough --> loopMBB
13519 BB->addSuccessor(loop1MBB);
13520
13521 // loop1MBB:
13522 // l[bhwd]arx dest, ptr
13523 // cmp[wd] dest, oldval
13524 // bne- exitBB
13525 // loop2MBB:
13526 // st[bhwd]cx. newval, ptr
13527 // bne- loopMBB
13528 // b exitBB
13529 // exitBB:
13530 BB = loop1MBB;
13531 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13532 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13533 .addReg(dest)
13534 .addReg(oldval);
13535 BuildMI(BB, dl, TII->get(PPC::BCC))
13537 .addReg(CrReg)
13538 .addMBB(exitMBB);
13539 BB->addSuccessor(loop2MBB);
13540 BB->addSuccessor(exitMBB);
13541
13542 BB = loop2MBB;
13543 BuildMI(BB, dl, TII->get(StoreMnemonic))
13544 .addReg(newval)
13545 .addReg(ptrA)
13546 .addReg(ptrB);
13547 BuildMI(BB, dl, TII->get(PPC::BCC))
13549 .addReg(PPC::CR0)
13550 .addMBB(loop1MBB);
13551 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13552 BB->addSuccessor(loop1MBB);
13553 BB->addSuccessor(exitMBB);
13554
13555 // exitMBB:
13556 // ...
13557 BB = exitMBB;
13558 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13559 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13560 // We must use 64-bit registers for addresses when targeting 64-bit,
13561 // since we're actually doing arithmetic on them. Other registers
13562 // can be 32-bit.
13563 bool is64bit = Subtarget.isPPC64();
13564 bool isLittleEndian = Subtarget.isLittleEndian();
13565 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13566
13567 Register dest = MI.getOperand(0).getReg();
13568 Register ptrA = MI.getOperand(1).getReg();
13569 Register ptrB = MI.getOperand(2).getReg();
13570 Register oldval = MI.getOperand(3).getReg();
13571 Register newval = MI.getOperand(4).getReg();
13572 DebugLoc dl = MI.getDebugLoc();
13573
13574 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13575 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13576 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13577 F->insert(It, loop1MBB);
13578 F->insert(It, loop2MBB);
13579 F->insert(It, exitMBB);
13580 exitMBB->splice(exitMBB->begin(), BB,
13581 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13583
13584 MachineRegisterInfo &RegInfo = F->getRegInfo();
13585 const TargetRegisterClass *RC =
13586 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13587 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13588
13589 Register PtrReg = RegInfo.createVirtualRegister(RC);
13590 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13591 Register ShiftReg =
13592 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13593 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13594 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13595 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13596 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13597 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13598 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13599 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13600 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13601 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13602 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13603 Register Ptr1Reg;
13604 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13605 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13606 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13607 // thisMBB:
13608 // ...
13609 // fallthrough --> loopMBB
13610 BB->addSuccessor(loop1MBB);
13611
13612 // The 4-byte load must be aligned, while a char or short may be
13613 // anywhere in the word. Hence all this nasty bookkeeping code.
13614 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13615 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13616 // xori shift, shift1, 24 [16]
13617 // rlwinm ptr, ptr1, 0, 0, 29
13618 // slw newval2, newval, shift
13619 // slw oldval2, oldval,shift
13620 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13621 // slw mask, mask2, shift
13622 // and newval3, newval2, mask
13623 // and oldval3, oldval2, mask
13624 // loop1MBB:
13625 // lwarx tmpDest, ptr
13626 // and tmp, tmpDest, mask
13627 // cmpw tmp, oldval3
13628 // bne- exitBB
13629 // loop2MBB:
13630 // andc tmp2, tmpDest, mask
13631 // or tmp4, tmp2, newval3
13632 // stwcx. tmp4, ptr
13633 // bne- loop1MBB
13634 // b exitBB
13635 // exitBB:
13636 // srw dest, tmpDest, shift
13637 if (ptrA != ZeroReg) {
13638 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13639 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13640 .addReg(ptrA)
13641 .addReg(ptrB);
13642 } else {
13643 Ptr1Reg = ptrB;
13644 }
13645
13646 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13647 // mode.
13648 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13649 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13650 .addImm(3)
13651 .addImm(27)
13652 .addImm(is8bit ? 28 : 27);
13653 if (!isLittleEndian)
13654 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13655 .addReg(Shift1Reg)
13656 .addImm(is8bit ? 24 : 16);
13657 if (is64bit)
13658 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13659 .addReg(Ptr1Reg)
13660 .addImm(0)
13661 .addImm(61);
13662 else
13663 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13664 .addReg(Ptr1Reg)
13665 .addImm(0)
13666 .addImm(0)
13667 .addImm(29);
13668 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13669 .addReg(newval)
13670 .addReg(ShiftReg);
13671 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13672 .addReg(oldval)
13673 .addReg(ShiftReg);
13674 if (is8bit)
13675 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13676 else {
13677 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13678 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13679 .addReg(Mask3Reg)
13680 .addImm(65535);
13681 }
13682 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13683 .addReg(Mask2Reg)
13684 .addReg(ShiftReg);
13685 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13686 .addReg(NewVal2Reg)
13687 .addReg(MaskReg);
13688 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13689 .addReg(OldVal2Reg)
13690 .addReg(MaskReg);
13691
13692 BB = loop1MBB;
13693 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13694 .addReg(ZeroReg)
13695 .addReg(PtrReg);
13696 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13697 .addReg(TmpDestReg)
13698 .addReg(MaskReg);
13699 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13700 .addReg(TmpReg)
13701 .addReg(OldVal3Reg);
13702 BuildMI(BB, dl, TII->get(PPC::BCC))
13704 .addReg(CrReg)
13705 .addMBB(exitMBB);
13706 BB->addSuccessor(loop2MBB);
13707 BB->addSuccessor(exitMBB);
13708
13709 BB = loop2MBB;
13710 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13711 .addReg(TmpDestReg)
13712 .addReg(MaskReg);
13713 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13714 .addReg(Tmp2Reg)
13715 .addReg(NewVal3Reg);
13716 BuildMI(BB, dl, TII->get(PPC::STWCX))
13717 .addReg(Tmp4Reg)
13718 .addReg(ZeroReg)
13719 .addReg(PtrReg);
13720 BuildMI(BB, dl, TII->get(PPC::BCC))
13722 .addReg(PPC::CR0)
13723 .addMBB(loop1MBB);
13724 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13725 BB->addSuccessor(loop1MBB);
13726 BB->addSuccessor(exitMBB);
13727
13728 // exitMBB:
13729 // ...
13730 BB = exitMBB;
13731 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13732 .addReg(TmpReg)
13733 .addReg(ShiftReg);
13734 } else if (MI.getOpcode() == PPC::FADDrtz) {
13735 // This pseudo performs an FADD with rounding mode temporarily forced
13736 // to round-to-zero. We emit this via custom inserter since the FPSCR
13737 // is not modeled at the SelectionDAG level.
13738 Register Dest = MI.getOperand(0).getReg();
13739 Register Src1 = MI.getOperand(1).getReg();
13740 Register Src2 = MI.getOperand(2).getReg();
13741 DebugLoc dl = MI.getDebugLoc();
13742
13743 MachineRegisterInfo &RegInfo = F->getRegInfo();
13744 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13745
13746 // Save FPSCR value.
13747 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13748
13749 // Set rounding mode to round-to-zero.
13750 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13751 .addImm(31)
13753
13754 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13755 .addImm(30)
13757
13758 // Perform addition.
13759 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13760 .addReg(Src1)
13761 .addReg(Src2);
13762 if (MI.getFlag(MachineInstr::NoFPExcept))
13764
13765 // Restore FPSCR value.
13766 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13767 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13768 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13769 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13770 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13771 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13772 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13773 ? PPC::ANDI8_rec
13774 : PPC::ANDI_rec;
13775 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13776 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13777
13778 MachineRegisterInfo &RegInfo = F->getRegInfo();
13779 Register Dest = RegInfo.createVirtualRegister(
13780 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13781
13782 DebugLoc Dl = MI.getDebugLoc();
13783 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13784 .addReg(MI.getOperand(1).getReg())
13785 .addImm(1);
13786 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13787 MI.getOperand(0).getReg())
13788 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13789 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13790 DebugLoc Dl = MI.getDebugLoc();
13791 MachineRegisterInfo &RegInfo = F->getRegInfo();
13792 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13793 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13794 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13795 MI.getOperand(0).getReg())
13796 .addReg(CRReg);
13797 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13798 DebugLoc Dl = MI.getDebugLoc();
13799 unsigned Imm = MI.getOperand(1).getImm();
13800 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13801 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13802 MI.getOperand(0).getReg())
13803 .addReg(PPC::CR0EQ);
13804 } else if (MI.getOpcode() == PPC::SETRNDi) {
13805 DebugLoc dl = MI.getDebugLoc();
13806 Register OldFPSCRReg = MI.getOperand(0).getReg();
13807
13808 // Save FPSCR value.
13809 if (MRI.use_empty(OldFPSCRReg))
13810 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13811 else
13812 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13813
13814 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13815 // the following settings:
13816 // 00 Round to nearest
13817 // 01 Round to 0
13818 // 10 Round to +inf
13819 // 11 Round to -inf
13820
13821 // When the operand is immediate, using the two least significant bits of
13822 // the immediate to set the bits 62:63 of FPSCR.
13823 unsigned Mode = MI.getOperand(1).getImm();
13824 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13825 .addImm(31)
13827
13828 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13829 .addImm(30)
13831 } else if (MI.getOpcode() == PPC::SETRND) {
13832 DebugLoc dl = MI.getDebugLoc();
13833
13834 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13835 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13836 // If the target doesn't have DirectMove, we should use stack to do the
13837 // conversion, because the target doesn't have the instructions like mtvsrd
13838 // or mfvsrd to do this conversion directly.
13839 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13840 if (Subtarget.hasDirectMove()) {
13841 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13842 .addReg(SrcReg);
13843 } else {
13844 // Use stack to do the register copy.
13845 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13846 MachineRegisterInfo &RegInfo = F->getRegInfo();
13847 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13848 if (RC == &PPC::F8RCRegClass) {
13849 // Copy register from F8RCRegClass to G8RCRegclass.
13850 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13851 "Unsupported RegClass.");
13852
13853 StoreOp = PPC::STFD;
13854 LoadOp = PPC::LD;
13855 } else {
13856 // Copy register from G8RCRegClass to F8RCRegclass.
13857 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13858 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13859 "Unsupported RegClass.");
13860 }
13861
13862 MachineFrameInfo &MFI = F->getFrameInfo();
13863 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13864
13865 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13866 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13868 MFI.getObjectAlign(FrameIdx));
13869
13870 // Store the SrcReg into the stack.
13871 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13872 .addReg(SrcReg)
13873 .addImm(0)
13874 .addFrameIndex(FrameIdx)
13875 .addMemOperand(MMOStore);
13876
13877 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13878 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13880 MFI.getObjectAlign(FrameIdx));
13881
13882 // Load from the stack where SrcReg is stored, and save to DestReg,
13883 // so we have done the RegClass conversion from RegClass::SrcReg to
13884 // RegClass::DestReg.
13885 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13886 .addImm(0)
13887 .addFrameIndex(FrameIdx)
13888 .addMemOperand(MMOLoad);
13889 }
13890 };
13891
13892 Register OldFPSCRReg = MI.getOperand(0).getReg();
13893
13894 // Save FPSCR value.
13895 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13896
13897 // When the operand is gprc register, use two least significant bits of the
13898 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13899 //
13900 // copy OldFPSCRTmpReg, OldFPSCRReg
13901 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13902 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13903 // copy NewFPSCRReg, NewFPSCRTmpReg
13904 // mtfsf 255, NewFPSCRReg
13905 MachineOperand SrcOp = MI.getOperand(1);
13906 MachineRegisterInfo &RegInfo = F->getRegInfo();
13907 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13908
13909 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13910
13911 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13912 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13913
13914 // The first operand of INSERT_SUBREG should be a register which has
13915 // subregisters, we only care about its RegClass, so we should use an
13916 // IMPLICIT_DEF register.
13917 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13918 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13919 .addReg(ImDefReg)
13920 .add(SrcOp)
13921 .addImm(1);
13922
13923 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13924 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13925 .addReg(OldFPSCRTmpReg)
13926 .addReg(ExtSrcReg)
13927 .addImm(0)
13928 .addImm(62);
13929
13930 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13931 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13932
13933 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13934 // bits of FPSCR.
13935 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13936 .addImm(255)
13937 .addReg(NewFPSCRReg)
13938 .addImm(0)
13939 .addImm(0);
13940 } else if (MI.getOpcode() == PPC::SETFLM) {
13941 DebugLoc Dl = MI.getDebugLoc();
13942
13943 // Result of setflm is previous FPSCR content, so we need to save it first.
13944 Register OldFPSCRReg = MI.getOperand(0).getReg();
13945 if (MRI.use_empty(OldFPSCRReg))
13946 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13947 else
13948 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13949
13950 // Put bits in 32:63 to FPSCR.
13951 Register NewFPSCRReg = MI.getOperand(1).getReg();
13952 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13953 .addImm(255)
13954 .addReg(NewFPSCRReg)
13955 .addImm(0)
13956 .addImm(0);
13957 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13958 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13959 return emitProbedAlloca(MI, BB);
13960 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13961 DebugLoc DL = MI.getDebugLoc();
13962 Register Src = MI.getOperand(2).getReg();
13963 Register Lo = MI.getOperand(0).getReg();
13964 Register Hi = MI.getOperand(1).getReg();
13965 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13966 .addDef(Lo)
13967 .addUse(Src, 0, PPC::sub_gp8_x1);
13968 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13969 .addDef(Hi)
13970 .addUse(Src, 0, PPC::sub_gp8_x0);
13971 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13972 MI.getOpcode() == PPC::STQX_PSEUDO) {
13973 DebugLoc DL = MI.getDebugLoc();
13974 // Ptr is used as the ptr_rc_no_r0 part
13975 // of LQ/STQ's memory operand and adding result of RA and RB,
13976 // so it has to be g8rc_and_g8rc_nox0.
13977 Register Ptr =
13978 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13979 Register Val = MI.getOperand(0).getReg();
13980 Register RA = MI.getOperand(1).getReg();
13981 Register RB = MI.getOperand(2).getReg();
13982 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13983 BuildMI(*BB, MI, DL,
13984 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13985 : TII->get(PPC::STQ))
13986 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13987 .addImm(0)
13988 .addReg(Ptr);
13989 } else {
13990 llvm_unreachable("Unexpected instr type to insert");
13991 }
13992
13993 MI.eraseFromParent(); // The pseudo instruction is gone now.
13994 return BB;
13995}
13996
13997//===----------------------------------------------------------------------===//
13998// Target Optimization Hooks
13999//===----------------------------------------------------------------------===//
14000
14001static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14002 // For the estimates, convergence is quadratic, so we essentially double the
14003 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14004 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14005 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14006 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14007 if (VT.getScalarType() == MVT::f64)
14008 RefinementSteps++;
14009 return RefinementSteps;
14010}
14011
14012SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14013 const DenormalMode &Mode) const {
14014 // We only have VSX Vector Test for software Square Root.
14015 EVT VT = Op.getValueType();
14016 if (!isTypeLegal(MVT::i1) ||
14017 (VT != MVT::f64 &&
14018 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14019 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
14020
14021 SDLoc DL(Op);
14022 // The output register of FTSQRT is CR field.
14023 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14024 // ftsqrt BF,FRB
14025 // Let e_b be the unbiased exponent of the double-precision
14026 // floating-point operand in register FRB.
14027 // fe_flag is set to 1 if either of the following conditions occurs.
14028 // - The double-precision floating-point operand in register FRB is a zero,
14029 // a NaN, or an infinity, or a negative value.
14030 // - e_b is less than or equal to -970.
14031 // Otherwise fe_flag is set to 0.
14032 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14033 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14034 // exponent is less than -970)
14035 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14036 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14037 FTSQRT, SRIdxVal),
14038 0);
14039}
14040
14041SDValue
14042PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14043 SelectionDAG &DAG) const {
14044 // We only have VSX Vector Square Root.
14045 EVT VT = Op.getValueType();
14046 if (VT != MVT::f64 &&
14047 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14049
14050 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14051}
14052
14053SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14054 int Enabled, int &RefinementSteps,
14055 bool &UseOneConstNR,
14056 bool Reciprocal) const {
14057 EVT VT = Operand.getValueType();
14058 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14059 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14060 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14061 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14062 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14063 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14064
14065 // The Newton-Raphson computation with a single constant does not provide
14066 // enough accuracy on some CPUs.
14067 UseOneConstNR = !Subtarget.needsTwoConstNR();
14068 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14069 }
14070 return SDValue();
14071}
14072
14073SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14074 int Enabled,
14075 int &RefinementSteps) const {
14076 EVT VT = Operand.getValueType();
14077 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14078 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14079 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14080 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14081 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14082 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14083 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14084 }
14085 return SDValue();
14086}
14087
14088unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14089 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14090 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14091 // enabled for division), this functionality is redundant with the default
14092 // combiner logic (once the division -> reciprocal/multiply transformation
14093 // has taken place). As a result, this matters more for older cores than for
14094 // newer ones.
14095
14096 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14097 // reciprocal if there are two or more FDIVs (for embedded cores with only
14098 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14099 switch (Subtarget.getCPUDirective()) {
14100 default:
14101 return 3;
14102 case PPC::DIR_440:
14103 case PPC::DIR_A2:
14104 case PPC::DIR_E500:
14105 case PPC::DIR_E500mc:
14106 case PPC::DIR_E5500:
14107 return 2;
14108 }
14109}
14110
14111// isConsecutiveLSLoc needs to work even if all adds have not yet been
14112// collapsed, and so we need to look through chains of them.
14114 int64_t& Offset, SelectionDAG &DAG) {
14115 if (DAG.isBaseWithConstantOffset(Loc)) {
14116 Base = Loc.getOperand(0);
14117 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14118
14119 // The base might itself be a base plus an offset, and if so, accumulate
14120 // that as well.
14122 }
14123}
14124
14126 unsigned Bytes, int Dist,
14127 SelectionDAG &DAG) {
14128 if (VT.getSizeInBits() / 8 != Bytes)
14129 return false;
14130
14131 SDValue BaseLoc = Base->getBasePtr();
14132 if (Loc.getOpcode() == ISD::FrameIndex) {
14133 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14134 return false;
14136 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14137 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14138 int FS = MFI.getObjectSize(FI);
14139 int BFS = MFI.getObjectSize(BFI);
14140 if (FS != BFS || FS != (int)Bytes) return false;
14141 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14142 }
14143
14144 SDValue Base1 = Loc, Base2 = BaseLoc;
14145 int64_t Offset1 = 0, Offset2 = 0;
14146 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14147 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14148 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14149 return true;
14150
14151 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14152 const GlobalValue *GV1 = nullptr;
14153 const GlobalValue *GV2 = nullptr;
14154 Offset1 = 0;
14155 Offset2 = 0;
14156 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14157 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14158 if (isGA1 && isGA2 && GV1 == GV2)
14159 return Offset1 == (Offset2 + Dist*Bytes);
14160 return false;
14161}
14162
14163// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14164// not enforce equality of the chain operands.
14166 unsigned Bytes, int Dist,
14167 SelectionDAG &DAG) {
14168 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
14169 EVT VT = LS->getMemoryVT();
14170 SDValue Loc = LS->getBasePtr();
14171 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14172 }
14173
14174 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14175 EVT VT;
14176 switch (N->getConstantOperandVal(1)) {
14177 default: return false;
14178 case Intrinsic::ppc_altivec_lvx:
14179 case Intrinsic::ppc_altivec_lvxl:
14180 case Intrinsic::ppc_vsx_lxvw4x:
14181 case Intrinsic::ppc_vsx_lxvw4x_be:
14182 VT = MVT::v4i32;
14183 break;
14184 case Intrinsic::ppc_vsx_lxvd2x:
14185 case Intrinsic::ppc_vsx_lxvd2x_be:
14186 VT = MVT::v2f64;
14187 break;
14188 case Intrinsic::ppc_altivec_lvebx:
14189 VT = MVT::i8;
14190 break;
14191 case Intrinsic::ppc_altivec_lvehx:
14192 VT = MVT::i16;
14193 break;
14194 case Intrinsic::ppc_altivec_lvewx:
14195 VT = MVT::i32;
14196 break;
14197 }
14198
14199 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14200 }
14201
14202 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14203 EVT VT;
14204 switch (N->getConstantOperandVal(1)) {
14205 default: return false;
14206 case Intrinsic::ppc_altivec_stvx:
14207 case Intrinsic::ppc_altivec_stvxl:
14208 case Intrinsic::ppc_vsx_stxvw4x:
14209 VT = MVT::v4i32;
14210 break;
14211 case Intrinsic::ppc_vsx_stxvd2x:
14212 VT = MVT::v2f64;
14213 break;
14214 case Intrinsic::ppc_vsx_stxvw4x_be:
14215 VT = MVT::v4i32;
14216 break;
14217 case Intrinsic::ppc_vsx_stxvd2x_be:
14218 VT = MVT::v2f64;
14219 break;
14220 case Intrinsic::ppc_altivec_stvebx:
14221 VT = MVT::i8;
14222 break;
14223 case Intrinsic::ppc_altivec_stvehx:
14224 VT = MVT::i16;
14225 break;
14226 case Intrinsic::ppc_altivec_stvewx:
14227 VT = MVT::i32;
14228 break;
14229 }
14230
14231 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14232 }
14233
14234 return false;
14235}
14236
14237// Return true is there is a nearyby consecutive load to the one provided
14238// (regardless of alignment). We search up and down the chain, looking though
14239// token factors and other loads (but nothing else). As a result, a true result
14240// indicates that it is safe to create a new consecutive load adjacent to the
14241// load provided.
14243 SDValue Chain = LD->getChain();
14244 EVT VT = LD->getMemoryVT();
14245
14246 SmallSet<SDNode *, 16> LoadRoots;
14247 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14248 SmallSet<SDNode *, 16> Visited;
14249
14250 // First, search up the chain, branching to follow all token-factor operands.
14251 // If we find a consecutive load, then we're done, otherwise, record all
14252 // nodes just above the top-level loads and token factors.
14253 while (!Queue.empty()) {
14254 SDNode *ChainNext = Queue.pop_back_val();
14255 if (!Visited.insert(ChainNext).second)
14256 continue;
14257
14258 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14259 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14260 return true;
14261
14262 if (!Visited.count(ChainLD->getChain().getNode()))
14263 Queue.push_back(ChainLD->getChain().getNode());
14264 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14265 for (const SDUse &O : ChainNext->ops())
14266 if (!Visited.count(O.getNode()))
14267 Queue.push_back(O.getNode());
14268 } else
14269 LoadRoots.insert(ChainNext);
14270 }
14271
14272 // Second, search down the chain, starting from the top-level nodes recorded
14273 // in the first phase. These top-level nodes are the nodes just above all
14274 // loads and token factors. Starting with their uses, recursively look though
14275 // all loads (just the chain uses) and token factors to find a consecutive
14276 // load.
14277 Visited.clear();
14278 Queue.clear();
14279
14280 for (SDNode *I : LoadRoots) {
14281 Queue.push_back(I);
14282
14283 while (!Queue.empty()) {
14284 SDNode *LoadRoot = Queue.pop_back_val();
14285 if (!Visited.insert(LoadRoot).second)
14286 continue;
14287
14288 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14289 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14290 return true;
14291
14292 for (SDNode *U : LoadRoot->users())
14293 if (((isa<MemSDNode>(U) &&
14294 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14295 U->getOpcode() == ISD::TokenFactor) &&
14296 !Visited.count(U))
14297 Queue.push_back(U);
14298 }
14299 }
14300
14301 return false;
14302}
14303
14304/// This function is called when we have proved that a SETCC node can be replaced
14305/// by subtraction (and other supporting instructions) so that the result of
14306/// comparison is kept in a GPR instead of CR. This function is purely for
14307/// codegen purposes and has some flags to guide the codegen process.
14308static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14309 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14310 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14311
14312 // Zero extend the operands to the largest legal integer. Originally, they
14313 // must be of a strictly smaller size.
14314 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14315 DAG.getConstant(Size, DL, MVT::i32));
14316 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14317 DAG.getConstant(Size, DL, MVT::i32));
14318
14319 // Swap if needed. Depends on the condition code.
14320 if (Swap)
14321 std::swap(Op0, Op1);
14322
14323 // Subtract extended integers.
14324 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14325
14326 // Move the sign bit to the least significant position and zero out the rest.
14327 // Now the least significant bit carries the result of original comparison.
14328 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14329 DAG.getConstant(Size - 1, DL, MVT::i32));
14330 auto Final = Shifted;
14331
14332 // Complement the result if needed. Based on the condition code.
14333 if (Complement)
14334 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14335 DAG.getConstant(1, DL, MVT::i64));
14336
14337 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14338}
14339
14340SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14341 DAGCombinerInfo &DCI) const {
14342 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14343
14344 SelectionDAG &DAG = DCI.DAG;
14345 SDLoc DL(N);
14346
14347 // Size of integers being compared has a critical role in the following
14348 // analysis, so we prefer to do this when all types are legal.
14349 if (!DCI.isAfterLegalizeDAG())
14350 return SDValue();
14351
14352 // If all users of SETCC extend its value to a legal integer type
14353 // then we replace SETCC with a subtraction
14354 for (const SDNode *U : N->users())
14355 if (U->getOpcode() != ISD::ZERO_EXTEND)
14356 return SDValue();
14357
14358 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14359 auto OpSize = N->getOperand(0).getValueSizeInBits();
14360
14362
14363 if (OpSize < Size) {
14364 switch (CC) {
14365 default: break;
14366 case ISD::SETULT:
14367 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14368 case ISD::SETULE:
14369 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14370 case ISD::SETUGT:
14371 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14372 case ISD::SETUGE:
14373 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14374 }
14375 }
14376
14377 return SDValue();
14378}
14379
14380SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14381 DAGCombinerInfo &DCI) const {
14382 SelectionDAG &DAG = DCI.DAG;
14383 SDLoc dl(N);
14384
14385 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14386 // If we're tracking CR bits, we need to be careful that we don't have:
14387 // trunc(binary-ops(zext(x), zext(y)))
14388 // or
14389 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14390 // such that we're unnecessarily moving things into GPRs when it would be
14391 // better to keep them in CR bits.
14392
14393 // Note that trunc here can be an actual i1 trunc, or can be the effective
14394 // truncation that comes from a setcc or select_cc.
14395 if (N->getOpcode() == ISD::TRUNCATE &&
14396 N->getValueType(0) != MVT::i1)
14397 return SDValue();
14398
14399 if (N->getOperand(0).getValueType() != MVT::i32 &&
14400 N->getOperand(0).getValueType() != MVT::i64)
14401 return SDValue();
14402
14403 if (N->getOpcode() == ISD::SETCC ||
14404 N->getOpcode() == ISD::SELECT_CC) {
14405 // If we're looking at a comparison, then we need to make sure that the
14406 // high bits (all except for the first) don't matter the result.
14408 cast<CondCodeSDNode>(N->getOperand(
14409 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14410 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14411
14413 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14414 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14415 return SDValue();
14416 } else if (ISD::isUnsignedIntSetCC(CC)) {
14417 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14418 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14419 !DAG.MaskedValueIsZero(N->getOperand(1),
14420 APInt::getHighBitsSet(OpBits, OpBits-1)))
14421 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14422 : SDValue());
14423 } else {
14424 // This is neither a signed nor an unsigned comparison, just make sure
14425 // that the high bits are equal.
14426 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14427 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14428
14429 // We don't really care about what is known about the first bit (if
14430 // anything), so pretend that it is known zero for both to ensure they can
14431 // be compared as constants.
14432 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14433 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14434
14435 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14436 Op1Known.getConstant() != Op2Known.getConstant())
14437 return SDValue();
14438 }
14439 }
14440
14441 // We now know that the higher-order bits are irrelevant, we just need to
14442 // make sure that all of the intermediate operations are bit operations, and
14443 // all inputs are extensions.
14444 if (N->getOperand(0).getOpcode() != ISD::AND &&
14445 N->getOperand(0).getOpcode() != ISD::OR &&
14446 N->getOperand(0).getOpcode() != ISD::XOR &&
14447 N->getOperand(0).getOpcode() != ISD::SELECT &&
14448 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14449 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14450 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14451 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14452 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14453 return SDValue();
14454
14455 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14456 N->getOperand(1).getOpcode() != ISD::AND &&
14457 N->getOperand(1).getOpcode() != ISD::OR &&
14458 N->getOperand(1).getOpcode() != ISD::XOR &&
14459 N->getOperand(1).getOpcode() != ISD::SELECT &&
14460 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14461 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14462 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14463 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14464 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14465 return SDValue();
14466
14468 SmallVector<SDValue, 8> BinOps, PromOps;
14470
14471 for (unsigned i = 0; i < 2; ++i) {
14472 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14473 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14474 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14475 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14476 isa<ConstantSDNode>(N->getOperand(i)))
14477 Inputs.push_back(N->getOperand(i));
14478 else
14479 BinOps.push_back(N->getOperand(i));
14480
14481 if (N->getOpcode() == ISD::TRUNCATE)
14482 break;
14483 }
14484
14485 // Visit all inputs, collect all binary operations (and, or, xor and
14486 // select) that are all fed by extensions.
14487 while (!BinOps.empty()) {
14488 SDValue BinOp = BinOps.pop_back_val();
14489
14490 if (!Visited.insert(BinOp.getNode()).second)
14491 continue;
14492
14493 PromOps.push_back(BinOp);
14494
14495 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14496 // The condition of the select is not promoted.
14497 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14498 continue;
14499 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14500 continue;
14501
14502 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14503 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14504 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14505 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14506 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14507 Inputs.push_back(BinOp.getOperand(i));
14508 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14509 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14510 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14511 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14512 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14513 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14514 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14515 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14516 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14517 BinOps.push_back(BinOp.getOperand(i));
14518 } else {
14519 // We have an input that is not an extension or another binary
14520 // operation; we'll abort this transformation.
14521 return SDValue();
14522 }
14523 }
14524 }
14525
14526 // Make sure that this is a self-contained cluster of operations (which
14527 // is not quite the same thing as saying that everything has only one
14528 // use).
14529 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14530 if (isa<ConstantSDNode>(Inputs[i]))
14531 continue;
14532
14533 for (const SDNode *User : Inputs[i].getNode()->users()) {
14534 if (User != N && !Visited.count(User))
14535 return SDValue();
14536
14537 // Make sure that we're not going to promote the non-output-value
14538 // operand(s) or SELECT or SELECT_CC.
14539 // FIXME: Although we could sometimes handle this, and it does occur in
14540 // practice that one of the condition inputs to the select is also one of
14541 // the outputs, we currently can't deal with this.
14542 if (User->getOpcode() == ISD::SELECT) {
14543 if (User->getOperand(0) == Inputs[i])
14544 return SDValue();
14545 } else if (User->getOpcode() == ISD::SELECT_CC) {
14546 if (User->getOperand(0) == Inputs[i] ||
14547 User->getOperand(1) == Inputs[i])
14548 return SDValue();
14549 }
14550 }
14551 }
14552
14553 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14554 for (const SDNode *User : PromOps[i].getNode()->users()) {
14555 if (User != N && !Visited.count(User))
14556 return SDValue();
14557
14558 // Make sure that we're not going to promote the non-output-value
14559 // operand(s) or SELECT or SELECT_CC.
14560 // FIXME: Although we could sometimes handle this, and it does occur in
14561 // practice that one of the condition inputs to the select is also one of
14562 // the outputs, we currently can't deal with this.
14563 if (User->getOpcode() == ISD::SELECT) {
14564 if (User->getOperand(0) == PromOps[i])
14565 return SDValue();
14566 } else if (User->getOpcode() == ISD::SELECT_CC) {
14567 if (User->getOperand(0) == PromOps[i] ||
14568 User->getOperand(1) == PromOps[i])
14569 return SDValue();
14570 }
14571 }
14572 }
14573
14574 // Replace all inputs with the extension operand.
14575 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14576 // Constants may have users outside the cluster of to-be-promoted nodes,
14577 // and so we need to replace those as we do the promotions.
14578 if (isa<ConstantSDNode>(Inputs[i]))
14579 continue;
14580 else
14581 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14582 }
14583
14584 std::list<HandleSDNode> PromOpHandles;
14585 for (auto &PromOp : PromOps)
14586 PromOpHandles.emplace_back(PromOp);
14587
14588 // Replace all operations (these are all the same, but have a different
14589 // (i1) return type). DAG.getNode will validate that the types of
14590 // a binary operator match, so go through the list in reverse so that
14591 // we've likely promoted both operands first. Any intermediate truncations or
14592 // extensions disappear.
14593 while (!PromOpHandles.empty()) {
14594 SDValue PromOp = PromOpHandles.back().getValue();
14595 PromOpHandles.pop_back();
14596
14597 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14598 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14599 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14600 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14601 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14602 PromOp.getOperand(0).getValueType() != MVT::i1) {
14603 // The operand is not yet ready (see comment below).
14604 PromOpHandles.emplace_front(PromOp);
14605 continue;
14606 }
14607
14608 SDValue RepValue = PromOp.getOperand(0);
14609 if (isa<ConstantSDNode>(RepValue))
14610 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14611
14612 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14613 continue;
14614 }
14615
14616 unsigned C;
14617 switch (PromOp.getOpcode()) {
14618 default: C = 0; break;
14619 case ISD::SELECT: C = 1; break;
14620 case ISD::SELECT_CC: C = 2; break;
14621 }
14622
14623 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14624 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14625 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14626 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14627 // The to-be-promoted operands of this node have not yet been
14628 // promoted (this should be rare because we're going through the
14629 // list backward, but if one of the operands has several users in
14630 // this cluster of to-be-promoted nodes, it is possible).
14631 PromOpHandles.emplace_front(PromOp);
14632 continue;
14633 }
14634
14635 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14636
14637 // If there are any constant inputs, make sure they're replaced now.
14638 for (unsigned i = 0; i < 2; ++i)
14639 if (isa<ConstantSDNode>(Ops[C+i]))
14640 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14641
14642 DAG.ReplaceAllUsesOfValueWith(PromOp,
14643 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14644 }
14645
14646 // Now we're left with the initial truncation itself.
14647 if (N->getOpcode() == ISD::TRUNCATE)
14648 return N->getOperand(0);
14649
14650 // Otherwise, this is a comparison. The operands to be compared have just
14651 // changed type (to i1), but everything else is the same.
14652 return SDValue(N, 0);
14653}
14654
14655SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14656 DAGCombinerInfo &DCI) const {
14657 SelectionDAG &DAG = DCI.DAG;
14658 SDLoc dl(N);
14659
14660 // If we're tracking CR bits, we need to be careful that we don't have:
14661 // zext(binary-ops(trunc(x), trunc(y)))
14662 // or
14663 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14664 // such that we're unnecessarily moving things into CR bits that can more
14665 // efficiently stay in GPRs. Note that if we're not certain that the high
14666 // bits are set as required by the final extension, we still may need to do
14667 // some masking to get the proper behavior.
14668
14669 // This same functionality is important on PPC64 when dealing with
14670 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14671 // the return values of functions. Because it is so similar, it is handled
14672 // here as well.
14673
14674 if (N->getValueType(0) != MVT::i32 &&
14675 N->getValueType(0) != MVT::i64)
14676 return SDValue();
14677
14678 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14679 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14680 return SDValue();
14681
14682 if (N->getOperand(0).getOpcode() != ISD::AND &&
14683 N->getOperand(0).getOpcode() != ISD::OR &&
14684 N->getOperand(0).getOpcode() != ISD::XOR &&
14685 N->getOperand(0).getOpcode() != ISD::SELECT &&
14686 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14687 return SDValue();
14688
14690 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14692
14693 // Visit all inputs, collect all binary operations (and, or, xor and
14694 // select) that are all fed by truncations.
14695 while (!BinOps.empty()) {
14696 SDValue BinOp = BinOps.pop_back_val();
14697
14698 if (!Visited.insert(BinOp.getNode()).second)
14699 continue;
14700
14701 PromOps.push_back(BinOp);
14702
14703 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14704 // The condition of the select is not promoted.
14705 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14706 continue;
14707 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14708 continue;
14709
14710 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14711 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14712 Inputs.push_back(BinOp.getOperand(i));
14713 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14714 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14715 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14716 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14717 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14718 BinOps.push_back(BinOp.getOperand(i));
14719 } else {
14720 // We have an input that is not a truncation or another binary
14721 // operation; we'll abort this transformation.
14722 return SDValue();
14723 }
14724 }
14725 }
14726
14727 // The operands of a select that must be truncated when the select is
14728 // promoted because the operand is actually part of the to-be-promoted set.
14729 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14730
14731 // Make sure that this is a self-contained cluster of operations (which
14732 // is not quite the same thing as saying that everything has only one
14733 // use).
14734 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14735 if (isa<ConstantSDNode>(Inputs[i]))
14736 continue;
14737
14738 for (SDNode *User : Inputs[i].getNode()->users()) {
14739 if (User != N && !Visited.count(User))
14740 return SDValue();
14741
14742 // If we're going to promote the non-output-value operand(s) or SELECT or
14743 // SELECT_CC, record them for truncation.
14744 if (User->getOpcode() == ISD::SELECT) {
14745 if (User->getOperand(0) == Inputs[i])
14746 SelectTruncOp[0].insert(std::make_pair(User,
14747 User->getOperand(0).getValueType()));
14748 } else if (User->getOpcode() == ISD::SELECT_CC) {
14749 if (User->getOperand(0) == Inputs[i])
14750 SelectTruncOp[0].insert(std::make_pair(User,
14751 User->getOperand(0).getValueType()));
14752 if (User->getOperand(1) == Inputs[i])
14753 SelectTruncOp[1].insert(std::make_pair(User,
14754 User->getOperand(1).getValueType()));
14755 }
14756 }
14757 }
14758
14759 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14760 for (SDNode *User : PromOps[i].getNode()->users()) {
14761 if (User != N && !Visited.count(User))
14762 return SDValue();
14763
14764 // If we're going to promote the non-output-value operand(s) or SELECT or
14765 // SELECT_CC, record them for truncation.
14766 if (User->getOpcode() == ISD::SELECT) {
14767 if (User->getOperand(0) == PromOps[i])
14768 SelectTruncOp[0].insert(std::make_pair(User,
14769 User->getOperand(0).getValueType()));
14770 } else if (User->getOpcode() == ISD::SELECT_CC) {
14771 if (User->getOperand(0) == PromOps[i])
14772 SelectTruncOp[0].insert(std::make_pair(User,
14773 User->getOperand(0).getValueType()));
14774 if (User->getOperand(1) == PromOps[i])
14775 SelectTruncOp[1].insert(std::make_pair(User,
14776 User->getOperand(1).getValueType()));
14777 }
14778 }
14779 }
14780
14781 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14782 bool ReallyNeedsExt = false;
14783 if (N->getOpcode() != ISD::ANY_EXTEND) {
14784 // If all of the inputs are not already sign/zero extended, then
14785 // we'll still need to do that at the end.
14786 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14787 if (isa<ConstantSDNode>(Inputs[i]))
14788 continue;
14789
14790 unsigned OpBits =
14791 Inputs[i].getOperand(0).getValueSizeInBits();
14792 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14793
14794 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14795 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14796 APInt::getHighBitsSet(OpBits,
14797 OpBits-PromBits))) ||
14798 (N->getOpcode() == ISD::SIGN_EXTEND &&
14799 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14800 (OpBits-(PromBits-1)))) {
14801 ReallyNeedsExt = true;
14802 break;
14803 }
14804 }
14805 }
14806
14807 // Replace all inputs, either with the truncation operand, or a
14808 // truncation or extension to the final output type.
14809 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14810 // Constant inputs need to be replaced with the to-be-promoted nodes that
14811 // use them because they might have users outside of the cluster of
14812 // promoted nodes.
14813 if (isa<ConstantSDNode>(Inputs[i]))
14814 continue;
14815
14816 SDValue InSrc = Inputs[i].getOperand(0);
14817 if (Inputs[i].getValueType() == N->getValueType(0))
14818 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14819 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14820 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14821 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14822 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14823 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14824 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14825 else
14826 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14827 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14828 }
14829
14830 std::list<HandleSDNode> PromOpHandles;
14831 for (auto &PromOp : PromOps)
14832 PromOpHandles.emplace_back(PromOp);
14833
14834 // Replace all operations (these are all the same, but have a different
14835 // (promoted) return type). DAG.getNode will validate that the types of
14836 // a binary operator match, so go through the list in reverse so that
14837 // we've likely promoted both operands first.
14838 while (!PromOpHandles.empty()) {
14839 SDValue PromOp = PromOpHandles.back().getValue();
14840 PromOpHandles.pop_back();
14841
14842 unsigned C;
14843 switch (PromOp.getOpcode()) {
14844 default: C = 0; break;
14845 case ISD::SELECT: C = 1; break;
14846 case ISD::SELECT_CC: C = 2; break;
14847 }
14848
14849 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14850 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14851 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14852 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14853 // The to-be-promoted operands of this node have not yet been
14854 // promoted (this should be rare because we're going through the
14855 // list backward, but if one of the operands has several users in
14856 // this cluster of to-be-promoted nodes, it is possible).
14857 PromOpHandles.emplace_front(PromOp);
14858 continue;
14859 }
14860
14861 // For SELECT and SELECT_CC nodes, we do a similar check for any
14862 // to-be-promoted comparison inputs.
14863 if (PromOp.getOpcode() == ISD::SELECT ||
14864 PromOp.getOpcode() == ISD::SELECT_CC) {
14865 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14866 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14867 (SelectTruncOp[1].count(PromOp.getNode()) &&
14868 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14869 PromOpHandles.emplace_front(PromOp);
14870 continue;
14871 }
14872 }
14873
14875 PromOp.getNode()->op_end());
14876
14877 // If this node has constant inputs, then they'll need to be promoted here.
14878 for (unsigned i = 0; i < 2; ++i) {
14879 if (!isa<ConstantSDNode>(Ops[C+i]))
14880 continue;
14881 if (Ops[C+i].getValueType() == N->getValueType(0))
14882 continue;
14883
14884 if (N->getOpcode() == ISD::SIGN_EXTEND)
14885 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14886 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14887 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14888 else
14889 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14890 }
14891
14892 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14893 // truncate them again to the original value type.
14894 if (PromOp.getOpcode() == ISD::SELECT ||
14895 PromOp.getOpcode() == ISD::SELECT_CC) {
14896 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14897 if (SI0 != SelectTruncOp[0].end())
14898 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14899 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14900 if (SI1 != SelectTruncOp[1].end())
14901 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14902 }
14903
14904 DAG.ReplaceAllUsesOfValueWith(PromOp,
14905 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14906 }
14907
14908 // Now we're left with the initial extension itself.
14909 if (!ReallyNeedsExt)
14910 return N->getOperand(0);
14911
14912 // To zero extend, just mask off everything except for the first bit (in the
14913 // i1 case).
14914 if (N->getOpcode() == ISD::ZERO_EXTEND)
14915 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14917 N->getValueSizeInBits(0), PromBits),
14918 dl, N->getValueType(0)));
14919
14920 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14921 "Invalid extension type");
14922 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14923 SDValue ShiftCst =
14924 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14925 return DAG.getNode(
14926 ISD::SRA, dl, N->getValueType(0),
14927 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14928 ShiftCst);
14929}
14930
14931SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14932 DAGCombinerInfo &DCI) const {
14933 assert(N->getOpcode() == ISD::SETCC &&
14934 "Should be called with a SETCC node");
14935
14936 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14937 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14938 SDValue LHS = N->getOperand(0);
14939 SDValue RHS = N->getOperand(1);
14940
14941 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14942 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14943 LHS.hasOneUse())
14944 std::swap(LHS, RHS);
14945
14946 // x == 0-y --> x+y == 0
14947 // x != 0-y --> x+y != 0
14948 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14949 RHS.hasOneUse()) {
14950 SDLoc DL(N);
14951 SelectionDAG &DAG = DCI.DAG;
14952 EVT VT = N->getValueType(0);
14953 EVT OpVT = LHS.getValueType();
14954 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14955 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14956 }
14957 }
14958
14959 return DAGCombineTruncBoolExt(N, DCI);
14960}
14961
14962// Is this an extending load from an f32 to an f64?
14963static bool isFPExtLoad(SDValue Op) {
14964 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14965 return LD->getExtensionType() == ISD::EXTLOAD &&
14966 Op.getValueType() == MVT::f64;
14967 return false;
14968}
14969
14970/// Reduces the number of fp-to-int conversion when building a vector.
14971///
14972/// If this vector is built out of floating to integer conversions,
14973/// transform it to a vector built out of floating point values followed by a
14974/// single floating to integer conversion of the vector.
14975/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14976/// becomes (fptosi (build_vector ($A, $B, ...)))
14977SDValue PPCTargetLowering::
14978combineElementTruncationToVectorTruncation(SDNode *N,
14979 DAGCombinerInfo &DCI) const {
14980 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14981 "Should be called with a BUILD_VECTOR node");
14982
14983 SelectionDAG &DAG = DCI.DAG;
14984 SDLoc dl(N);
14985
14986 SDValue FirstInput = N->getOperand(0);
14987 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14988 "The input operand must be an fp-to-int conversion.");
14989
14990 // This combine happens after legalization so the fp_to_[su]i nodes are
14991 // already converted to PPCSISD nodes.
14992 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14993 if (FirstConversion == PPCISD::FCTIDZ ||
14994 FirstConversion == PPCISD::FCTIDUZ ||
14995 FirstConversion == PPCISD::FCTIWZ ||
14996 FirstConversion == PPCISD::FCTIWUZ) {
14997 bool IsSplat = true;
14998 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14999 FirstConversion == PPCISD::FCTIWUZ;
15000 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15002 EVT TargetVT = N->getValueType(0);
15003 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15004 SDValue NextOp = N->getOperand(i);
15005 if (NextOp.getOpcode() != PPCISD::MFVSR)
15006 return SDValue();
15007 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15008 if (NextConversion != FirstConversion)
15009 return SDValue();
15010 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15011 // This is not valid if the input was originally double precision. It is
15012 // also not profitable to do unless this is an extending load in which
15013 // case doing this combine will allow us to combine consecutive loads.
15014 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15015 return SDValue();
15016 if (N->getOperand(i) != FirstInput)
15017 IsSplat = false;
15018 }
15019
15020 // If this is a splat, we leave it as-is since there will be only a single
15021 // fp-to-int conversion followed by a splat of the integer. This is better
15022 // for 32-bit and smaller ints and neutral for 64-bit ints.
15023 if (IsSplat)
15024 return SDValue();
15025
15026 // Now that we know we have the right type of node, get its operands
15027 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15028 SDValue In = N->getOperand(i).getOperand(0);
15029 if (Is32Bit) {
15030 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15031 // here, we know that all inputs are extending loads so this is safe).
15032 if (In.isUndef())
15033 Ops.push_back(DAG.getUNDEF(SrcVT));
15034 else {
15035 SDValue Trunc =
15036 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15037 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15038 Ops.push_back(Trunc);
15039 }
15040 } else
15041 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15042 }
15043
15044 unsigned Opcode;
15045 if (FirstConversion == PPCISD::FCTIDZ ||
15046 FirstConversion == PPCISD::FCTIWZ)
15047 Opcode = ISD::FP_TO_SINT;
15048 else
15049 Opcode = ISD::FP_TO_UINT;
15050
15051 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15052 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15053 return DAG.getNode(Opcode, dl, TargetVT, BV);
15054 }
15055 return SDValue();
15056}
15057
15058/// Reduce the number of loads when building a vector.
15059///
15060/// Building a vector out of multiple loads can be converted to a load
15061/// of the vector type if the loads are consecutive. If the loads are
15062/// consecutive but in descending order, a shuffle is added at the end
15063/// to reorder the vector.
15065 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15066 "Should be called with a BUILD_VECTOR node");
15067
15068 SDLoc dl(N);
15069
15070 // Return early for non byte-sized type, as they can't be consecutive.
15071 if (!N->getValueType(0).getVectorElementType().isByteSized())
15072 return SDValue();
15073
15074 bool InputsAreConsecutiveLoads = true;
15075 bool InputsAreReverseConsecutive = true;
15076 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15077 SDValue FirstInput = N->getOperand(0);
15078 bool IsRoundOfExtLoad = false;
15079 LoadSDNode *FirstLoad = nullptr;
15080
15081 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15082 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15083 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15084 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15085 }
15086 // Not a build vector of (possibly fp_rounded) loads.
15087 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15088 N->getNumOperands() == 1)
15089 return SDValue();
15090
15091 if (!IsRoundOfExtLoad)
15092 FirstLoad = cast<LoadSDNode>(FirstInput);
15093
15095 InputLoads.push_back(FirstLoad);
15096 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15097 // If any inputs are fp_round(extload), they all must be.
15098 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15099 return SDValue();
15100
15101 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15102 N->getOperand(i);
15103 if (NextInput.getOpcode() != ISD::LOAD)
15104 return SDValue();
15105
15106 SDValue PreviousInput =
15107 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15108 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15109 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15110
15111 // If any inputs are fp_round(extload), they all must be.
15112 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15113 return SDValue();
15114
15115 // We only care about regular loads. The PPC-specific load intrinsics
15116 // will not lead to a merge opportunity.
15117 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15118 InputsAreConsecutiveLoads = false;
15119 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15120 InputsAreReverseConsecutive = false;
15121
15122 // Exit early if the loads are neither consecutive nor reverse consecutive.
15123 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15124 return SDValue();
15125 InputLoads.push_back(LD2);
15126 }
15127
15128 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15129 "The loads cannot be both consecutive and reverse consecutive.");
15130
15131 SDValue WideLoad;
15132 SDValue ReturnSDVal;
15133 if (InputsAreConsecutiveLoads) {
15134 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15135 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15136 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15137 FirstLoad->getAlign());
15138 ReturnSDVal = WideLoad;
15139 } else if (InputsAreReverseConsecutive) {
15140 LoadSDNode *LastLoad = InputLoads.back();
15141 assert(LastLoad && "Input needs to be a LoadSDNode.");
15142 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15143 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15144 LastLoad->getAlign());
15146 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15147 Ops.push_back(i);
15148
15149 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15150 DAG.getUNDEF(N->getValueType(0)), Ops);
15151 } else
15152 return SDValue();
15153
15154 for (auto *LD : InputLoads)
15155 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15156 return ReturnSDVal;
15157}
15158
15159// This function adds the required vector_shuffle needed to get
15160// the elements of the vector extract in the correct position
15161// as specified by the CorrectElems encoding.
15163 SDValue Input, uint64_t Elems,
15164 uint64_t CorrectElems) {
15165 SDLoc dl(N);
15166
15167 unsigned NumElems = Input.getValueType().getVectorNumElements();
15168 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15169
15170 // Knowing the element indices being extracted from the original
15171 // vector and the order in which they're being inserted, just put
15172 // them at element indices required for the instruction.
15173 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15174 if (DAG.getDataLayout().isLittleEndian())
15175 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15176 else
15177 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15178 CorrectElems = CorrectElems >> 8;
15179 Elems = Elems >> 8;
15180 }
15181
15182 SDValue Shuffle =
15183 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15184 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15185
15186 EVT VT = N->getValueType(0);
15187 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15188
15189 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15192 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15193 DAG.getValueType(ExtVT));
15194}
15195
15196// Look for build vector patterns where input operands come from sign
15197// extended vector_extract elements of specific indices. If the correct indices
15198// aren't used, add a vector shuffle to fix up the indices and create
15199// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15200// during instruction selection.
15202 // This array encodes the indices that the vector sign extend instructions
15203 // extract from when extending from one type to another for both BE and LE.
15204 // The right nibble of each byte corresponds to the LE incides.
15205 // and the left nibble of each byte corresponds to the BE incides.
15206 // For example: 0x3074B8FC byte->word
15207 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15208 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15209 // For example: 0x000070F8 byte->double word
15210 // For LE: the allowed indices are: 0x0,0x8
15211 // For BE: the allowed indices are: 0x7,0xF
15212 uint64_t TargetElems[] = {
15213 0x3074B8FC, // b->w
15214 0x000070F8, // b->d
15215 0x10325476, // h->w
15216 0x00003074, // h->d
15217 0x00001032, // w->d
15218 };
15219
15220 uint64_t Elems = 0;
15221 int Index;
15222 SDValue Input;
15223
15224 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15225 if (!Op)
15226 return false;
15227 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15228 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15229 return false;
15230
15231 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15232 // of the right width.
15233 SDValue Extract = Op.getOperand(0);
15234 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15235 Extract = Extract.getOperand(0);
15236 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15237 return false;
15238
15239 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
15240 if (!ExtOp)
15241 return false;
15242
15243 Index = ExtOp->getZExtValue();
15244 if (Input && Input != Extract.getOperand(0))
15245 return false;
15246
15247 if (!Input)
15248 Input = Extract.getOperand(0);
15249
15250 Elems = Elems << 8;
15251 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15252 Elems |= Index;
15253
15254 return true;
15255 };
15256
15257 // If the build vector operands aren't sign extended vector extracts,
15258 // of the same input vector, then return.
15259 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15260 if (!isSExtOfVecExtract(N->getOperand(i))) {
15261 return SDValue();
15262 }
15263 }
15264
15265 // If the vector extract indices are not correct, add the appropriate
15266 // vector_shuffle.
15267 int TgtElemArrayIdx;
15268 int InputSize = Input.getValueType().getScalarSizeInBits();
15269 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15270 if (InputSize + OutputSize == 40)
15271 TgtElemArrayIdx = 0;
15272 else if (InputSize + OutputSize == 72)
15273 TgtElemArrayIdx = 1;
15274 else if (InputSize + OutputSize == 48)
15275 TgtElemArrayIdx = 2;
15276 else if (InputSize + OutputSize == 80)
15277 TgtElemArrayIdx = 3;
15278 else if (InputSize + OutputSize == 96)
15279 TgtElemArrayIdx = 4;
15280 else
15281 return SDValue();
15282
15283 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15284 CorrectElems = DAG.getDataLayout().isLittleEndian()
15285 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15286 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15287 if (Elems != CorrectElems) {
15288 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15289 }
15290
15291 // Regular lowering will catch cases where a shuffle is not needed.
15292 return SDValue();
15293}
15294
15295// Look for the pattern of a load from a narrow width to i128, feeding
15296// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15297// (LXVRZX). This node represents a zero extending load that will be matched
15298// to the Load VSX Vector Rightmost instructions.
15300 SDLoc DL(N);
15301
15302 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15303 if (N->getValueType(0) != MVT::v1i128)
15304 return SDValue();
15305
15306 SDValue Operand = N->getOperand(0);
15307 // Proceed with the transformation if the operand to the BUILD_VECTOR
15308 // is a load instruction.
15309 if (Operand.getOpcode() != ISD::LOAD)
15310 return SDValue();
15311
15312 auto *LD = cast<LoadSDNode>(Operand);
15313 EVT MemoryType = LD->getMemoryVT();
15314
15315 // This transformation is only valid if the we are loading either a byte,
15316 // halfword, word, or doubleword.
15317 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15318 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15319
15320 // Ensure that the load from the narrow width is being zero extended to i128.
15321 if (!ValidLDType ||
15322 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15323 LD->getExtensionType() != ISD::EXTLOAD))
15324 return SDValue();
15325
15326 SDValue LoadOps[] = {
15327 LD->getChain(), LD->getBasePtr(),
15328 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15329
15331 DAG.getVTList(MVT::v1i128, MVT::Other),
15332 LoadOps, MemoryType, LD->getMemOperand());
15333}
15334
15335SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15336 DAGCombinerInfo &DCI) const {
15337 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15338 "Should be called with a BUILD_VECTOR node");
15339
15340 SelectionDAG &DAG = DCI.DAG;
15341 SDLoc dl(N);
15342
15343 if (!Subtarget.hasVSX())
15344 return SDValue();
15345
15346 // The target independent DAG combiner will leave a build_vector of
15347 // float-to-int conversions intact. We can generate MUCH better code for
15348 // a float-to-int conversion of a vector of floats.
15349 SDValue FirstInput = N->getOperand(0);
15350 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15351 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15352 if (Reduced)
15353 return Reduced;
15354 }
15355
15356 // If we're building a vector out of consecutive loads, just load that
15357 // vector type.
15358 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15359 if (Reduced)
15360 return Reduced;
15361
15362 // If we're building a vector out of extended elements from another vector
15363 // we have P9 vector integer extend instructions. The code assumes legal
15364 // input types (i.e. it can't handle things like v4i16) so do not run before
15365 // legalization.
15366 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15367 Reduced = combineBVOfVecSExt(N, DAG);
15368 if (Reduced)
15369 return Reduced;
15370 }
15371
15372 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15373 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15374 // is a load from <valid narrow width> to i128.
15375 if (Subtarget.isISA3_1()) {
15376 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15377 if (BVOfZLoad)
15378 return BVOfZLoad;
15379 }
15380
15381 if (N->getValueType(0) != MVT::v2f64)
15382 return SDValue();
15383
15384 // Looking for:
15385 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15386 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15387 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15388 return SDValue();
15389 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15390 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15391 return SDValue();
15392 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15393 return SDValue();
15394
15395 SDValue Ext1 = FirstInput.getOperand(0);
15396 SDValue Ext2 = N->getOperand(1).getOperand(0);
15397 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15399 return SDValue();
15400
15401 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15402 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15403 if (!Ext1Op || !Ext2Op)
15404 return SDValue();
15405 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15406 Ext1.getOperand(0) != Ext2.getOperand(0))
15407 return SDValue();
15408
15409 int FirstElem = Ext1Op->getZExtValue();
15410 int SecondElem = Ext2Op->getZExtValue();
15411 int SubvecIdx;
15412 if (FirstElem == 0 && SecondElem == 1)
15413 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15414 else if (FirstElem == 2 && SecondElem == 3)
15415 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15416 else
15417 return SDValue();
15418
15419 SDValue SrcVec = Ext1.getOperand(0);
15420 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15422 return DAG.getNode(NodeType, dl, MVT::v2f64,
15423 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15424}
15425
15426SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15427 DAGCombinerInfo &DCI) const {
15428 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15429 N->getOpcode() == ISD::UINT_TO_FP) &&
15430 "Need an int -> FP conversion node here");
15431
15432 if (useSoftFloat() || !Subtarget.has64BitSupport())
15433 return SDValue();
15434
15435 SelectionDAG &DAG = DCI.DAG;
15436 SDLoc dl(N);
15437 SDValue Op(N, 0);
15438
15439 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15440 // from the hardware.
15441 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15442 return SDValue();
15443 if (!Op.getOperand(0).getValueType().isSimple())
15444 return SDValue();
15445 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15446 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15447 return SDValue();
15448
15449 SDValue FirstOperand(Op.getOperand(0));
15450 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15451 (FirstOperand.getValueType() == MVT::i8 ||
15452 FirstOperand.getValueType() == MVT::i16);
15453 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15454 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15455 bool DstDouble = Op.getValueType() == MVT::f64;
15456 unsigned ConvOp = Signed ?
15457 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15458 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15459 SDValue WidthConst =
15460 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15461 dl, false);
15462 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15463 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15465 DAG.getVTList(MVT::f64, MVT::Other),
15466 Ops, MVT::i8, LDN->getMemOperand());
15467 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15468
15469 // For signed conversion, we need to sign-extend the value in the VSR
15470 if (Signed) {
15471 SDValue ExtOps[] = { Ld, WidthConst };
15472 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15473 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15474 } else
15475 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15476 }
15477
15478
15479 // For i32 intermediate values, unfortunately, the conversion functions
15480 // leave the upper 32 bits of the value are undefined. Within the set of
15481 // scalar instructions, we have no method for zero- or sign-extending the
15482 // value. Thus, we cannot handle i32 intermediate values here.
15483 if (Op.getOperand(0).getValueType() == MVT::i32)
15484 return SDValue();
15485
15486 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15487 "UINT_TO_FP is supported only with FPCVT");
15488
15489 // If we have FCFIDS, then use it when converting to single-precision.
15490 // Otherwise, convert to double-precision and then round.
15491 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15492 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15494 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15495 : PPCISD::FCFID);
15496 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15497 ? MVT::f32
15498 : MVT::f64;
15499
15500 // If we're converting from a float, to an int, and back to a float again,
15501 // then we don't need the store/load pair at all.
15502 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15503 Subtarget.hasFPCVT()) ||
15504 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15505 SDValue Src = Op.getOperand(0).getOperand(0);
15506 if (Src.getValueType() == MVT::f32) {
15507 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15508 DCI.AddToWorklist(Src.getNode());
15509 } else if (Src.getValueType() != MVT::f64) {
15510 // Make sure that we don't pick up a ppc_fp128 source value.
15511 return SDValue();
15512 }
15513
15514 unsigned FCTOp =
15515 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15517
15518 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15519 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15520
15521 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15522 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15523 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15524 DCI.AddToWorklist(FP.getNode());
15525 }
15526
15527 return FP;
15528 }
15529
15530 return SDValue();
15531}
15532
15533// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15534// builtins) into loads with swaps.
15536 DAGCombinerInfo &DCI) const {
15537 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15538 // load combines.
15539 if (DCI.isBeforeLegalizeOps())
15540 return SDValue();
15541
15542 SelectionDAG &DAG = DCI.DAG;
15543 SDLoc dl(N);
15544 SDValue Chain;
15545 SDValue Base;
15546 MachineMemOperand *MMO;
15547
15548 switch (N->getOpcode()) {
15549 default:
15550 llvm_unreachable("Unexpected opcode for little endian VSX load");
15551 case ISD::LOAD: {
15552 LoadSDNode *LD = cast<LoadSDNode>(N);
15553 Chain = LD->getChain();
15554 Base = LD->getBasePtr();
15555 MMO = LD->getMemOperand();
15556 // If the MMO suggests this isn't a load of a full vector, leave
15557 // things alone. For a built-in, we have to make the change for
15558 // correctness, so if there is a size problem that will be a bug.
15559 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15560 return SDValue();
15561 break;
15562 }
15564 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15565 Chain = Intrin->getChain();
15566 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15567 // us what we want. Get operand 2 instead.
15568 Base = Intrin->getOperand(2);
15569 MMO = Intrin->getMemOperand();
15570 break;
15571 }
15572 }
15573
15574 MVT VecTy = N->getValueType(0).getSimpleVT();
15575
15576 SDValue LoadOps[] = { Chain, Base };
15578 DAG.getVTList(MVT::v2f64, MVT::Other),
15579 LoadOps, MVT::v2f64, MMO);
15580
15581 DCI.AddToWorklist(Load.getNode());
15582 Chain = Load.getValue(1);
15583 SDValue Swap = DAG.getNode(
15584 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15585 DCI.AddToWorklist(Swap.getNode());
15586
15587 // Add a bitcast if the resulting load type doesn't match v2f64.
15588 if (VecTy != MVT::v2f64) {
15589 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15590 DCI.AddToWorklist(N.getNode());
15591 // Package {bitcast value, swap's chain} to match Load's shape.
15592 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15593 N, Swap.getValue(1));
15594 }
15595
15596 return Swap;
15597}
15598
15599// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15600// builtins) into stores with swaps.
15602 DAGCombinerInfo &DCI) const {
15603 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15604 // store combines.
15605 if (DCI.isBeforeLegalizeOps())
15606 return SDValue();
15607
15608 SelectionDAG &DAG = DCI.DAG;
15609 SDLoc dl(N);
15610 SDValue Chain;
15611 SDValue Base;
15612 unsigned SrcOpnd;
15613 MachineMemOperand *MMO;
15614
15615 switch (N->getOpcode()) {
15616 default:
15617 llvm_unreachable("Unexpected opcode for little endian VSX store");
15618 case ISD::STORE: {
15619 StoreSDNode *ST = cast<StoreSDNode>(N);
15620 Chain = ST->getChain();
15621 Base = ST->getBasePtr();
15622 MMO = ST->getMemOperand();
15623 SrcOpnd = 1;
15624 // If the MMO suggests this isn't a store of a full vector, leave
15625 // things alone. For a built-in, we have to make the change for
15626 // correctness, so if there is a size problem that will be a bug.
15627 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15628 return SDValue();
15629 break;
15630 }
15631 case ISD::INTRINSIC_VOID: {
15632 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15633 Chain = Intrin->getChain();
15634 // Intrin->getBasePtr() oddly does not get what we want.
15635 Base = Intrin->getOperand(3);
15636 MMO = Intrin->getMemOperand();
15637 SrcOpnd = 2;
15638 break;
15639 }
15640 }
15641
15642 SDValue Src = N->getOperand(SrcOpnd);
15643 MVT VecTy = Src.getValueType().getSimpleVT();
15644
15645 // All stores are done as v2f64 and possible bit cast.
15646 if (VecTy != MVT::v2f64) {
15647 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15648 DCI.AddToWorklist(Src.getNode());
15649 }
15650
15651 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15652 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15653 DCI.AddToWorklist(Swap.getNode());
15654 Chain = Swap.getValue(1);
15655 SDValue StoreOps[] = { Chain, Swap, Base };
15657 DAG.getVTList(MVT::Other),
15658 StoreOps, VecTy, MMO);
15659 DCI.AddToWorklist(Store.getNode());
15660 return Store;
15661}
15662
15663// Handle DAG combine for STORE (FP_TO_INT F).
15664SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15665 DAGCombinerInfo &DCI) const {
15666 SelectionDAG &DAG = DCI.DAG;
15667 SDLoc dl(N);
15668 unsigned Opcode = N->getOperand(1).getOpcode();
15669 (void)Opcode;
15670 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15671
15672 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15673 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15674 && "Not a FP_TO_INT Instruction!");
15675
15676 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15677 EVT Op1VT = N->getOperand(1).getValueType();
15678 EVT ResVT = Val.getValueType();
15679
15680 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15681 return SDValue();
15682
15683 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15684 bool ValidTypeForStoreFltAsInt =
15685 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15686 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15687
15688 // TODO: Lower conversion from f128 on all VSX targets
15689 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15690 return SDValue();
15691
15692 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15693 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15694 return SDValue();
15695
15696 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15697
15698 // Set number of bytes being converted.
15699 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15700 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15701 DAG.getIntPtrConstant(ByteSize, dl, false),
15702 DAG.getValueType(Op1VT)};
15703
15705 DAG.getVTList(MVT::Other), Ops,
15706 cast<StoreSDNode>(N)->getMemoryVT(),
15707 cast<StoreSDNode>(N)->getMemOperand());
15708
15709 return Val;
15710}
15711
15712static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15713 // Check that the source of the element keeps flipping
15714 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15715 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15716 for (int i = 1, e = Mask.size(); i < e; i++) {
15717 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15718 return false;
15719 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15720 return false;
15721 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15722 }
15723 return true;
15724}
15725
15726static bool isSplatBV(SDValue Op) {
15727 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15728 return false;
15729 SDValue FirstOp;
15730
15731 // Find first non-undef input.
15732 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15733 FirstOp = Op.getOperand(i);
15734 if (!FirstOp.isUndef())
15735 break;
15736 }
15737
15738 // All inputs are undef or the same as the first non-undef input.
15739 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15740 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15741 return false;
15742 return true;
15743}
15744
15746 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15747 return Op;
15748 if (Op.getOpcode() != ISD::BITCAST)
15749 return SDValue();
15750 Op = Op.getOperand(0);
15751 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15752 return Op;
15753 return SDValue();
15754}
15755
15756// Fix up the shuffle mask to account for the fact that the result of
15757// scalar_to_vector is not in lane zero. This just takes all values in
15758// the ranges specified by the min/max indices and adds the number of
15759// elements required to ensure each element comes from the respective
15760// position in the valid lane.
15761// On little endian, that's just the corresponding element in the other
15762// half of the vector. On big endian, it is in the same half but right
15763// justified rather than left justified in that half.
15765 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15766 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15767 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15768 int LHSEltFixup =
15769 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15770 int RHSEltFixup =
15771 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15772 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15773 int Idx = ShuffV[I];
15774 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15775 ShuffV[I] += LHSEltFixup;
15776 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15777 ShuffV[I] += RHSEltFixup;
15778 }
15779}
15780
15781// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15782// the original is:
15783// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15784// In such a case, just change the shuffle mask to extract the element
15785// from the permuted index.
15787 const PPCSubtarget &Subtarget) {
15788 SDLoc dl(OrigSToV);
15789 EVT VT = OrigSToV.getValueType();
15790 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15791 "Expecting a SCALAR_TO_VECTOR here");
15792 SDValue Input = OrigSToV.getOperand(0);
15793
15794 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15795 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15796 SDValue OrigVector = Input.getOperand(0);
15797
15798 // Can't handle non-const element indices or different vector types
15799 // for the input to the extract and the output of the scalar_to_vector.
15800 if (Idx && VT == OrigVector.getValueType()) {
15801 unsigned NumElts = VT.getVectorNumElements();
15802 assert(
15803 NumElts > 1 &&
15804 "Cannot produce a permuted scalar_to_vector for one element vector");
15805 SmallVector<int, 16> NewMask(NumElts, -1);
15806 unsigned ResultInElt = NumElts / 2;
15807 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15808 NewMask[ResultInElt] = Idx->getZExtValue();
15809 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15810 }
15811 }
15812 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15813 OrigSToV.getOperand(0));
15814}
15815
15817 int HalfVec, int LHSLastElementDefined,
15818 int RHSLastElementDefined) {
15819 for (int Index : ShuffV) {
15820 if (Index < 0) // Skip explicitly undefined mask indices.
15821 continue;
15822 // Handle first input vector of the vector_shuffle.
15823 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15824 (Index > LHSLastElementDefined))
15825 return false;
15826 // Handle second input vector of the vector_shuffle.
15827 if ((RHSLastElementDefined >= 0) &&
15828 (Index > HalfVec + RHSLastElementDefined))
15829 return false;
15830 }
15831 return true;
15832}
15833
15835 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15836 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15837 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15838 EVT VecShuffOperandType = VecShuffOperand.getValueType();
15839 // Set up the values for the shuffle vector fixup.
15840 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15841 // The last element depends on if the input comes from the LHS or RHS.
15842 //
15843 // For example:
15844 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15845 //
15846 // For the LHS: The last element that comes from the LHS is actually 0, not 3
15847 // because elements 1 and higher of a scalar_to_vector are undefined.
15848 // For the RHS: The last element that comes from the RHS is actually 5, not 7
15849 // because elements 1 and higher of a scalar_to_vector are undefined.
15850 // It is also not 4 because the original scalar_to_vector is wider and
15851 // actually contains two i32 elements.
15852 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15853 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15854 : FirstElt;
15855 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15856 if (SToVPermuted.getValueType() != VecShuffOperandType)
15857 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15858 return SToVPermuted;
15859}
15860
15861// On little endian subtargets, combine shuffles such as:
15862// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15863// into:
15864// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15865// because the latter can be matched to a single instruction merge.
15866// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15867// to put the value into element zero. Adjust the shuffle mask so that the
15868// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15869// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15870// nodes with elements smaller than doubleword because all the ways
15871// of getting scalar data into a vector register put the value in the
15872// rightmost element of the left half of the vector.
15873SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15874 SelectionDAG &DAG) const {
15875 SDValue LHS = SVN->getOperand(0);
15876 SDValue RHS = SVN->getOperand(1);
15877 auto Mask = SVN->getMask();
15878 int NumElts = LHS.getValueType().getVectorNumElements();
15879 SDValue Res(SVN, 0);
15880 SDLoc dl(SVN);
15881 bool IsLittleEndian = Subtarget.isLittleEndian();
15882
15883 // On big endian targets this is only useful for subtargets with direct moves.
15884 // On little endian targets it would be useful for all subtargets with VSX.
15885 // However adding special handling for LE subtargets without direct moves
15886 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15887 // which includes direct moves.
15888 if (!Subtarget.hasDirectMove())
15889 return Res;
15890
15891 // If this is not a shuffle of a shuffle and the first element comes from
15892 // the second vector, canonicalize to the commuted form. This will make it
15893 // more likely to match one of the single instruction patterns.
15894 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15895 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15896 std::swap(LHS, RHS);
15897 Res = DAG.getCommutedVectorShuffle(*SVN);
15898 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15899 }
15900
15901 // Adjust the shuffle mask if either input vector comes from a
15902 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15903 // form (to prevent the need for a swap).
15904 SmallVector<int, 16> ShuffV(Mask);
15905 SDValue SToVLHS = isScalarToVec(LHS);
15906 SDValue SToVRHS = isScalarToVec(RHS);
15907 if (SToVLHS || SToVRHS) {
15908 EVT VT = SVN->getValueType(0);
15909 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15910 int ShuffleNumElts = ShuffV.size();
15911 int HalfVec = ShuffleNumElts / 2;
15912 // The width of the "valid lane" (i.e. the lane that contains the value that
15913 // is vectorized) needs to be expressed in terms of the number of elements
15914 // of the shuffle. It is thereby the ratio of the values before and after
15915 // any bitcast, which will be set later on if the LHS or RHS are
15916 // SCALAR_TO_VECTOR nodes.
15917 unsigned LHSNumValidElts = HalfVec;
15918 unsigned RHSNumValidElts = HalfVec;
15919
15920 // Initially assume that neither input is permuted. These will be adjusted
15921 // accordingly if either input is. Note, that -1 means that all elements
15922 // are undefined.
15923 int LHSFirstElt = 0;
15924 int RHSFirstElt = ShuffleNumElts;
15925 int LHSLastElt = -1;
15926 int RHSLastElt = -1;
15927
15928 // Get the permuted scalar to vector nodes for the source(s) that come from
15929 // ISD::SCALAR_TO_VECTOR.
15930 // On big endian systems, this only makes sense for element sizes smaller
15931 // than 64 bits since for 64-bit elements, all instructions already put
15932 // the value into element zero. Since scalar size of LHS and RHS may differ
15933 // after isScalarToVec, this should be checked using their own sizes.
15934 int LHSScalarSize = 0;
15935 int RHSScalarSize = 0;
15936 if (SToVLHS) {
15937 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15938 if (!IsLittleEndian && LHSScalarSize >= 64)
15939 return Res;
15940 }
15941 if (SToVRHS) {
15942 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15943 if (!IsLittleEndian && RHSScalarSize >= 64)
15944 return Res;
15945 }
15946 if (LHSScalarSize != 0)
15948 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15949 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15950 if (RHSScalarSize != 0)
15952 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15953 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15954
15955 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15956 return Res;
15957
15958 // Fix up the shuffle mask to reflect where the desired element actually is.
15959 // The minimum and maximum indices that correspond to element zero for both
15960 // the LHS and RHS are computed and will control which shuffle mask entries
15961 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15962 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15964 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15965 LHSNumValidElts, RHSNumValidElts, Subtarget);
15966 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15967
15968 // We may have simplified away the shuffle. We won't be able to do anything
15969 // further with it here.
15970 if (!isa<ShuffleVectorSDNode>(Res))
15971 return Res;
15972 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15973 }
15974
15975 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15976 // The common case after we commuted the shuffle is that the RHS is a splat
15977 // and we have elements coming in from the splat at indices that are not
15978 // conducive to using a merge.
15979 // Example:
15980 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15981 if (!isSplatBV(TheSplat))
15982 return Res;
15983
15984 // We are looking for a mask such that all even elements are from
15985 // one vector and all odd elements from the other.
15986 if (!isAlternatingShuffMask(Mask, NumElts))
15987 return Res;
15988
15989 // Adjust the mask so we are pulling in the same index from the splat
15990 // as the index from the interesting vector in consecutive elements.
15991 if (IsLittleEndian) {
15992 // Example (even elements from first vector):
15993 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15994 if (Mask[0] < NumElts)
15995 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15996 if (ShuffV[i] < 0)
15997 continue;
15998 // If element from non-splat is undef, pick first element from splat.
15999 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16000 }
16001 // Example (odd elements from first vector):
16002 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16003 else
16004 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16005 if (ShuffV[i] < 0)
16006 continue;
16007 // If element from non-splat is undef, pick first element from splat.
16008 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16009 }
16010 } else {
16011 // Example (even elements from first vector):
16012 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16013 if (Mask[0] < NumElts)
16014 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16015 if (ShuffV[i] < 0)
16016 continue;
16017 // If element from non-splat is undef, pick first element from splat.
16018 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16019 }
16020 // Example (odd elements from first vector):
16021 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16022 else
16023 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16024 if (ShuffV[i] < 0)
16025 continue;
16026 // If element from non-splat is undef, pick first element from splat.
16027 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16028 }
16029 }
16030
16031 // If the RHS has undefs, we need to remove them since we may have created
16032 // a shuffle that adds those instead of the splat value.
16033 SDValue SplatVal =
16034 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16035 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16036
16037 if (IsLittleEndian)
16038 RHS = TheSplat;
16039 else
16040 LHS = TheSplat;
16041 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16042}
16043
16044SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16045 LSBaseSDNode *LSBase,
16046 DAGCombinerInfo &DCI) const {
16047 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16048 "Not a reverse memop pattern!");
16049
16050 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16051 auto Mask = SVN->getMask();
16052 int i = 0;
16053 auto I = Mask.rbegin();
16054 auto E = Mask.rend();
16055
16056 for (; I != E; ++I) {
16057 if (*I != i)
16058 return false;
16059 i++;
16060 }
16061 return true;
16062 };
16063
16064 SelectionDAG &DAG = DCI.DAG;
16065 EVT VT = SVN->getValueType(0);
16066
16067 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16068 return SDValue();
16069
16070 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16071 // See comment in PPCVSXSwapRemoval.cpp.
16072 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16073 if (!Subtarget.hasP9Vector())
16074 return SDValue();
16075
16076 if(!IsElementReverse(SVN))
16077 return SDValue();
16078
16079 if (LSBase->getOpcode() == ISD::LOAD) {
16080 // If the load return value 0 has more than one user except the
16081 // shufflevector instruction, it is not profitable to replace the
16082 // shufflevector with a reverse load.
16083 for (SDUse &Use : LSBase->uses())
16084 if (Use.getResNo() == 0 &&
16085 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16086 return SDValue();
16087
16088 SDLoc dl(LSBase);
16089 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16090 return DAG.getMemIntrinsicNode(
16091 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16092 LSBase->getMemoryVT(), LSBase->getMemOperand());
16093 }
16094
16095 if (LSBase->getOpcode() == ISD::STORE) {
16096 // If there are other uses of the shuffle, the swap cannot be avoided.
16097 // Forcing the use of an X-Form (since swapped stores only have
16098 // X-Forms) without removing the swap is unprofitable.
16099 if (!SVN->hasOneUse())
16100 return SDValue();
16101
16102 SDLoc dl(LSBase);
16103 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16104 LSBase->getBasePtr()};
16105 return DAG.getMemIntrinsicNode(
16106 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16107 LSBase->getMemoryVT(), LSBase->getMemOperand());
16108 }
16109
16110 llvm_unreachable("Expected a load or store node here");
16111}
16112
16113static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16114 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16115 if (IntrinsicID == Intrinsic::ppc_stdcx)
16116 StoreWidth = 8;
16117 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16118 StoreWidth = 4;
16119 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16120 StoreWidth = 2;
16121 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16122 StoreWidth = 1;
16123 else
16124 return false;
16125 return true;
16126}
16127
16129 DAGCombinerInfo &DCI) const {
16130 SelectionDAG &DAG = DCI.DAG;
16131 SDLoc dl(N);
16132 switch (N->getOpcode()) {
16133 default: break;
16134 case ISD::ADD:
16135 return combineADD(N, DCI);
16136 case ISD::AND: {
16137 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16138 // original input as that will prevent us from selecting optimal rotates.
16139 // This only matters if the input to the extend is i32 widened to i64.
16140 SDValue Op1 = N->getOperand(0);
16141 SDValue Op2 = N->getOperand(1);
16142 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16143 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16144 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16145 Op1.getOperand(0).getValueType() != MVT::i32)
16146 break;
16147 SDValue NarrowOp = Op1.getOperand(0);
16148 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16149 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16150 break;
16151
16152 uint64_t Imm = Op2->getAsZExtVal();
16153 // Make sure that the constant is narrow enough to fit in the narrow type.
16154 if (!isUInt<32>(Imm))
16155 break;
16156 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16157 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16158 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16159 }
16160 case ISD::SHL:
16161 return combineSHL(N, DCI);
16162 case ISD::SRA:
16163 return combineSRA(N, DCI);
16164 case ISD::SRL:
16165 return combineSRL(N, DCI);
16166 case ISD::MUL:
16167 return combineMUL(N, DCI);
16168 case ISD::FMA:
16169 case PPCISD::FNMSUB:
16170 return combineFMALike(N, DCI);
16171 case PPCISD::SHL:
16172 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16173 return N->getOperand(0);
16174 break;
16175 case PPCISD::SRL:
16176 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16177 return N->getOperand(0);
16178 break;
16179 case PPCISD::SRA:
16180 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16181 if (C->isZero() || // 0 >>s V -> 0.
16182 C->isAllOnes()) // -1 >>s V -> -1.
16183 return N->getOperand(0);
16184 }
16185 break;
16186 case ISD::SIGN_EXTEND:
16187 case ISD::ZERO_EXTEND:
16188 case ISD::ANY_EXTEND:
16189 return DAGCombineExtBoolTrunc(N, DCI);
16190 case ISD::TRUNCATE:
16191 return combineTRUNCATE(N, DCI);
16192 case ISD::SETCC:
16193 if (SDValue CSCC = combineSetCC(N, DCI))
16194 return CSCC;
16195 [[fallthrough]];
16196 case ISD::SELECT_CC:
16197 return DAGCombineTruncBoolExt(N, DCI);
16198 case ISD::SINT_TO_FP:
16199 case ISD::UINT_TO_FP:
16200 return combineFPToIntToFP(N, DCI);
16202 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16203 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16204 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16205 }
16206 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16207 case ISD::STORE: {
16208
16209 EVT Op1VT = N->getOperand(1).getValueType();
16210 unsigned Opcode = N->getOperand(1).getOpcode();
16211
16212 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16213 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16214 SDValue Val = combineStoreFPToInt(N, DCI);
16215 if (Val)
16216 return Val;
16217 }
16218
16219 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16220 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16221 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16222 if (Val)
16223 return Val;
16224 }
16225
16226 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16227 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16228 N->getOperand(1).getNode()->hasOneUse() &&
16229 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16230 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16231
16232 // STBRX can only handle simple types and it makes no sense to store less
16233 // two bytes in byte-reversed order.
16234 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16235 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16236 break;
16237
16238 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16239 // Do an any-extend to 32-bits if this is a half-word input.
16240 if (BSwapOp.getValueType() == MVT::i16)
16241 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16242
16243 // If the type of BSWAP operand is wider than stored memory width
16244 // it need to be shifted to the right side before STBRX.
16245 if (Op1VT.bitsGT(mVT)) {
16246 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16247 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16248 DAG.getConstant(Shift, dl, MVT::i32));
16249 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16250 if (Op1VT == MVT::i64)
16251 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16252 }
16253
16254 SDValue Ops[] = {
16255 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16256 };
16257 return
16258 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16259 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16260 cast<StoreSDNode>(N)->getMemOperand());
16261 }
16262
16263 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16264 // So it can increase the chance of CSE constant construction.
16265 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16266 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16267 // Need to sign-extended to 64-bits to handle negative values.
16268 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16269 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16270 MemVT.getSizeInBits());
16271 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16272
16273 // DAG.getTruncStore() can't be used here because it doesn't accept
16274 // the general (base + offset) addressing mode.
16275 // So we use UpdateNodeOperands and setTruncatingStore instead.
16276 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
16277 N->getOperand(3));
16278 cast<StoreSDNode>(N)->setTruncatingStore(true);
16279 return SDValue(N, 0);
16280 }
16281
16282 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16283 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16284 if (Op1VT.isSimple()) {
16285 MVT StoreVT = Op1VT.getSimpleVT();
16286 if (Subtarget.needsSwapsForVSXMemOps() &&
16287 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16288 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16289 return expandVSXStoreForLE(N, DCI);
16290 }
16291 break;
16292 }
16293 case ISD::LOAD: {
16294 LoadSDNode *LD = cast<LoadSDNode>(N);
16295 EVT VT = LD->getValueType(0);
16296
16297 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16298 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16299 if (VT.isSimple()) {
16300 MVT LoadVT = VT.getSimpleVT();
16301 if (Subtarget.needsSwapsForVSXMemOps() &&
16302 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16303 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16304 return expandVSXLoadForLE(N, DCI);
16305 }
16306
16307 // We sometimes end up with a 64-bit integer load, from which we extract
16308 // two single-precision floating-point numbers. This happens with
16309 // std::complex<float>, and other similar structures, because of the way we
16310 // canonicalize structure copies. However, if we lack direct moves,
16311 // then the final bitcasts from the extracted integer values to the
16312 // floating-point numbers turn into store/load pairs. Even with direct moves,
16313 // just loading the two floating-point numbers is likely better.
16314 auto ReplaceTwoFloatLoad = [&]() {
16315 if (VT != MVT::i64)
16316 return false;
16317
16318 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16319 LD->isVolatile())
16320 return false;
16321
16322 // We're looking for a sequence like this:
16323 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16324 // t16: i64 = srl t13, Constant:i32<32>
16325 // t17: i32 = truncate t16
16326 // t18: f32 = bitcast t17
16327 // t19: i32 = truncate t13
16328 // t20: f32 = bitcast t19
16329
16330 if (!LD->hasNUsesOfValue(2, 0))
16331 return false;
16332
16333 auto UI = LD->user_begin();
16334 while (UI.getUse().getResNo() != 0) ++UI;
16335 SDNode *Trunc = *UI++;
16336 while (UI.getUse().getResNo() != 0) ++UI;
16337 SDNode *RightShift = *UI;
16338 if (Trunc->getOpcode() != ISD::TRUNCATE)
16339 std::swap(Trunc, RightShift);
16340
16341 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16342 Trunc->getValueType(0) != MVT::i32 ||
16343 !Trunc->hasOneUse())
16344 return false;
16345 if (RightShift->getOpcode() != ISD::SRL ||
16346 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16347 RightShift->getConstantOperandVal(1) != 32 ||
16348 !RightShift->hasOneUse())
16349 return false;
16350
16351 SDNode *Trunc2 = *RightShift->user_begin();
16352 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16353 Trunc2->getValueType(0) != MVT::i32 ||
16354 !Trunc2->hasOneUse())
16355 return false;
16356
16357 SDNode *Bitcast = *Trunc->user_begin();
16358 SDNode *Bitcast2 = *Trunc2->user_begin();
16359
16360 if (Bitcast->getOpcode() != ISD::BITCAST ||
16361 Bitcast->getValueType(0) != MVT::f32)
16362 return false;
16363 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16364 Bitcast2->getValueType(0) != MVT::f32)
16365 return false;
16366
16367 if (Subtarget.isLittleEndian())
16368 std::swap(Bitcast, Bitcast2);
16369
16370 // Bitcast has the second float (in memory-layout order) and Bitcast2
16371 // has the first one.
16372
16373 SDValue BasePtr = LD->getBasePtr();
16374 if (LD->isIndexed()) {
16375 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16376 "Non-pre-inc AM on PPC?");
16377 BasePtr =
16378 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16379 LD->getOffset());
16380 }
16381
16382 auto MMOFlags =
16383 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16384 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16385 LD->getPointerInfo(), LD->getAlign(),
16386 MMOFlags, LD->getAAInfo());
16387 SDValue AddPtr =
16388 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16389 BasePtr, DAG.getIntPtrConstant(4, dl));
16390 SDValue FloatLoad2 = DAG.getLoad(
16391 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16392 LD->getPointerInfo().getWithOffset(4),
16393 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16394
16395 if (LD->isIndexed()) {
16396 // Note that DAGCombine should re-form any pre-increment load(s) from
16397 // what is produced here if that makes sense.
16398 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16399 }
16400
16401 DCI.CombineTo(Bitcast2, FloatLoad);
16402 DCI.CombineTo(Bitcast, FloatLoad2);
16403
16404 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16405 SDValue(FloatLoad2.getNode(), 1));
16406 return true;
16407 };
16408
16409 if (ReplaceTwoFloatLoad())
16410 return SDValue(N, 0);
16411
16412 EVT MemVT = LD->getMemoryVT();
16413 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16414 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16415 if (LD->isUnindexed() && VT.isVector() &&
16416 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16417 // P8 and later hardware should just use LOAD.
16418 !Subtarget.hasP8Vector() &&
16419 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16420 VT == MVT::v4f32))) &&
16421 LD->getAlign() < ABIAlignment) {
16422 // This is a type-legal unaligned Altivec load.
16423 SDValue Chain = LD->getChain();
16424 SDValue Ptr = LD->getBasePtr();
16425 bool isLittleEndian = Subtarget.isLittleEndian();
16426
16427 // This implements the loading of unaligned vectors as described in
16428 // the venerable Apple Velocity Engine overview. Specifically:
16429 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16430 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16431 //
16432 // The general idea is to expand a sequence of one or more unaligned
16433 // loads into an alignment-based permutation-control instruction (lvsl
16434 // or lvsr), a series of regular vector loads (which always truncate
16435 // their input address to an aligned address), and a series of
16436 // permutations. The results of these permutations are the requested
16437 // loaded values. The trick is that the last "extra" load is not taken
16438 // from the address you might suspect (sizeof(vector) bytes after the
16439 // last requested load), but rather sizeof(vector) - 1 bytes after the
16440 // last requested vector. The point of this is to avoid a page fault if
16441 // the base address happened to be aligned. This works because if the
16442 // base address is aligned, then adding less than a full vector length
16443 // will cause the last vector in the sequence to be (re)loaded.
16444 // Otherwise, the next vector will be fetched as you might suspect was
16445 // necessary.
16446
16447 // We might be able to reuse the permutation generation from
16448 // a different base address offset from this one by an aligned amount.
16449 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16450 // optimization later.
16451 Intrinsic::ID Intr, IntrLD, IntrPerm;
16452 MVT PermCntlTy, PermTy, LDTy;
16453 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16454 : Intrinsic::ppc_altivec_lvsl;
16455 IntrLD = Intrinsic::ppc_altivec_lvx;
16456 IntrPerm = Intrinsic::ppc_altivec_vperm;
16457 PermCntlTy = MVT::v16i8;
16458 PermTy = MVT::v4i32;
16459 LDTy = MVT::v4i32;
16460
16461 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16462
16463 // Create the new MMO for the new base load. It is like the original MMO,
16464 // but represents an area in memory almost twice the vector size centered
16465 // on the original address. If the address is unaligned, we might start
16466 // reading up to (sizeof(vector)-1) bytes below the address of the
16467 // original unaligned load.
16469 MachineMemOperand *BaseMMO =
16470 MF.getMachineMemOperand(LD->getMemOperand(),
16471 -(int64_t)MemVT.getStoreSize()+1,
16472 2*MemVT.getStoreSize()-1);
16473
16474 // Create the new base load.
16475 SDValue LDXIntID =
16476 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16477 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16478 SDValue BaseLoad =
16480 DAG.getVTList(PermTy, MVT::Other),
16481 BaseLoadOps, LDTy, BaseMMO);
16482
16483 // Note that the value of IncOffset (which is provided to the next
16484 // load's pointer info offset value, and thus used to calculate the
16485 // alignment), and the value of IncValue (which is actually used to
16486 // increment the pointer value) are different! This is because we
16487 // require the next load to appear to be aligned, even though it
16488 // is actually offset from the base pointer by a lesser amount.
16489 int IncOffset = VT.getSizeInBits() / 8;
16490 int IncValue = IncOffset;
16491
16492 // Walk (both up and down) the chain looking for another load at the real
16493 // (aligned) offset (the alignment of the other load does not matter in
16494 // this case). If found, then do not use the offset reduction trick, as
16495 // that will prevent the loads from being later combined (as they would
16496 // otherwise be duplicates).
16497 if (!findConsecutiveLoad(LD, DAG))
16498 --IncValue;
16499
16500 SDValue Increment =
16501 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16502 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16503
16504 MachineMemOperand *ExtraMMO =
16505 MF.getMachineMemOperand(LD->getMemOperand(),
16506 1, 2*MemVT.getStoreSize()-1);
16507 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16508 SDValue ExtraLoad =
16510 DAG.getVTList(PermTy, MVT::Other),
16511 ExtraLoadOps, LDTy, ExtraMMO);
16512
16513 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16514 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16515
16516 // Because vperm has a big-endian bias, we must reverse the order
16517 // of the input vectors and complement the permute control vector
16518 // when generating little endian code. We have already handled the
16519 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16520 // and ExtraLoad here.
16521 SDValue Perm;
16522 if (isLittleEndian)
16523 Perm = BuildIntrinsicOp(IntrPerm,
16524 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16525 else
16526 Perm = BuildIntrinsicOp(IntrPerm,
16527 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16528
16529 if (VT != PermTy)
16530 Perm = Subtarget.hasAltivec()
16531 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16532 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16533 DAG.getTargetConstant(1, dl, MVT::i64));
16534 // second argument is 1 because this rounding
16535 // is always exact.
16536
16537 // The output of the permutation is our loaded result, the TokenFactor is
16538 // our new chain.
16539 DCI.CombineTo(N, Perm, TF);
16540 return SDValue(N, 0);
16541 }
16542 }
16543 break;
16545 bool isLittleEndian = Subtarget.isLittleEndian();
16546 unsigned IID = N->getConstantOperandVal(0);
16547 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16548 : Intrinsic::ppc_altivec_lvsl);
16549 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16550 SDValue Add = N->getOperand(1);
16551
16552 int Bits = 4 /* 16 byte alignment */;
16553
16554 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16555 APInt::getAllOnes(Bits /* alignment */)
16556 .zext(Add.getScalarValueSizeInBits()))) {
16557 SDNode *BasePtr = Add->getOperand(0).getNode();
16558 for (SDNode *U : BasePtr->users()) {
16559 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16560 U->getConstantOperandVal(0) == IID) {
16561 // We've found another LVSL/LVSR, and this address is an aligned
16562 // multiple of that one. The results will be the same, so use the
16563 // one we've just found instead.
16564
16565 return SDValue(U, 0);
16566 }
16567 }
16568 }
16569
16570 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16571 SDNode *BasePtr = Add->getOperand(0).getNode();
16572 for (SDNode *U : BasePtr->users()) {
16573 if (U->getOpcode() == ISD::ADD &&
16574 isa<ConstantSDNode>(U->getOperand(1)) &&
16575 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16576 (1ULL << Bits) ==
16577 0) {
16578 SDNode *OtherAdd = U;
16579 for (SDNode *V : OtherAdd->users()) {
16580 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16581 V->getConstantOperandVal(0) == IID) {
16582 return SDValue(V, 0);
16583 }
16584 }
16585 }
16586 }
16587 }
16588 }
16589
16590 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16591 // Expose the vabsduw/h/b opportunity for down stream
16592 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16593 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16594 IID == Intrinsic::ppc_altivec_vmaxsh ||
16595 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16596 SDValue V1 = N->getOperand(1);
16597 SDValue V2 = N->getOperand(2);
16598 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16599 V1.getSimpleValueType() == MVT::v8i16 ||
16600 V1.getSimpleValueType() == MVT::v16i8) &&
16601 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16602 // (0-a, a)
16603 if (V1.getOpcode() == ISD::SUB &&
16605 V1.getOperand(1) == V2) {
16606 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16607 }
16608 // (a, 0-a)
16609 if (V2.getOpcode() == ISD::SUB &&
16610 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16611 V2.getOperand(1) == V1) {
16612 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16613 }
16614 // (x-y, y-x)
16615 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16616 V1.getOperand(0) == V2.getOperand(1) &&
16617 V1.getOperand(1) == V2.getOperand(0)) {
16618 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16619 }
16620 }
16621 }
16622 }
16623
16624 break;
16626 switch (N->getConstantOperandVal(1)) {
16627 default:
16628 break;
16629 case Intrinsic::ppc_altivec_vsum4sbs:
16630 case Intrinsic::ppc_altivec_vsum4shs:
16631 case Intrinsic::ppc_altivec_vsum4ubs: {
16632 // These sum-across intrinsics only have a chain due to the side effect
16633 // that they may set the SAT bit. If we know the SAT bit will not be set
16634 // for some inputs, we can replace any uses of their chain with the
16635 // input chain.
16636 if (BuildVectorSDNode *BVN =
16637 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16638 APInt APSplatBits, APSplatUndef;
16639 unsigned SplatBitSize;
16640 bool HasAnyUndefs;
16641 bool BVNIsConstantSplat = BVN->isConstantSplat(
16642 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16643 !Subtarget.isLittleEndian());
16644 // If the constant splat vector is 0, the SAT bit will not be set.
16645 if (BVNIsConstantSplat && APSplatBits == 0)
16646 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16647 }
16648 return SDValue();
16649 }
16650 case Intrinsic::ppc_vsx_lxvw4x:
16651 case Intrinsic::ppc_vsx_lxvd2x:
16652 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16653 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16654 if (Subtarget.needsSwapsForVSXMemOps())
16655 return expandVSXLoadForLE(N, DCI);
16656 break;
16657 }
16658 break;
16660 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16661 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16662 if (Subtarget.needsSwapsForVSXMemOps()) {
16663 switch (N->getConstantOperandVal(1)) {
16664 default:
16665 break;
16666 case Intrinsic::ppc_vsx_stxvw4x:
16667 case Intrinsic::ppc_vsx_stxvd2x:
16668 return expandVSXStoreForLE(N, DCI);
16669 }
16670 }
16671 break;
16672 case ISD::BSWAP: {
16673 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16674 // For subtargets without LDBRX, we can still do better than the default
16675 // expansion even for 64-bit BSWAP (LOAD).
16676 bool Is64BitBswapOn64BitTgt =
16677 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16678 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16679 N->getOperand(0).hasOneUse();
16680 if (IsSingleUseNormalLd &&
16681 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16682 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16683 SDValue Load = N->getOperand(0);
16684 LoadSDNode *LD = cast<LoadSDNode>(Load);
16685 // Create the byte-swapping load.
16686 SDValue Ops[] = {
16687 LD->getChain(), // Chain
16688 LD->getBasePtr(), // Ptr
16689 DAG.getValueType(N->getValueType(0)) // VT
16690 };
16691 SDValue BSLoad =
16693 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16694 MVT::i64 : MVT::i32, MVT::Other),
16695 Ops, LD->getMemoryVT(), LD->getMemOperand());
16696
16697 // If this is an i16 load, insert the truncate.
16698 SDValue ResVal = BSLoad;
16699 if (N->getValueType(0) == MVT::i16)
16700 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16701
16702 // First, combine the bswap away. This makes the value produced by the
16703 // load dead.
16704 DCI.CombineTo(N, ResVal);
16705
16706 // Next, combine the load away, we give it a bogus result value but a real
16707 // chain result. The result value is dead because the bswap is dead.
16708 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16709
16710 // Return N so it doesn't get rechecked!
16711 return SDValue(N, 0);
16712 }
16713 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16714 // before legalization so that the BUILD_PAIR is handled correctly.
16715 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16716 !IsSingleUseNormalLd)
16717 return SDValue();
16718 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16719
16720 // Can't split volatile or atomic loads.
16721 if (!LD->isSimple())
16722 return SDValue();
16723 SDValue BasePtr = LD->getBasePtr();
16724 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16725 LD->getPointerInfo(), LD->getAlign());
16726 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16727 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16728 DAG.getIntPtrConstant(4, dl));
16730 LD->getMemOperand(), 4, 4);
16731 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16732 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16733 SDValue Res;
16734 if (Subtarget.isLittleEndian())
16735 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16736 else
16737 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16738 SDValue TF =
16739 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16740 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16741 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16742 return Res;
16743 }
16744 case PPCISD::VCMP:
16745 // If a VCMP_rec node already exists with exactly the same operands as this
16746 // node, use its result instead of this node (VCMP_rec computes both a CR6
16747 // and a normal output).
16748 //
16749 if (!N->getOperand(0).hasOneUse() &&
16750 !N->getOperand(1).hasOneUse() &&
16751 !N->getOperand(2).hasOneUse()) {
16752
16753 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16754 SDNode *VCMPrecNode = nullptr;
16755
16756 SDNode *LHSN = N->getOperand(0).getNode();
16757 for (SDNode *User : LHSN->users())
16758 if (User->getOpcode() == PPCISD::VCMP_rec &&
16759 User->getOperand(1) == N->getOperand(1) &&
16760 User->getOperand(2) == N->getOperand(2) &&
16761 User->getOperand(0) == N->getOperand(0)) {
16762 VCMPrecNode = User;
16763 break;
16764 }
16765
16766 // If there is no VCMP_rec node, or if the flag value has a single use,
16767 // don't transform this.
16768 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16769 break;
16770
16771 // Look at the (necessarily single) use of the flag value. If it has a
16772 // chain, this transformation is more complex. Note that multiple things
16773 // could use the value result, which we should ignore.
16774 SDNode *FlagUser = nullptr;
16775 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16776 FlagUser == nullptr; ++UI) {
16777 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16778 SDNode *User = UI->getUser();
16779 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16780 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16781 FlagUser = User;
16782 break;
16783 }
16784 }
16785 }
16786
16787 // If the user is a MFOCRF instruction, we know this is safe.
16788 // Otherwise we give up for right now.
16789 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16790 return SDValue(VCMPrecNode, 0);
16791 }
16792 break;
16793 case ISD::BR_CC: {
16794 // If this is a branch on an altivec predicate comparison, lower this so
16795 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16796 // lowering is done pre-legalize, because the legalizer lowers the predicate
16797 // compare down to code that is difficult to reassemble.
16798 // This code also handles branches that depend on the result of a store
16799 // conditional.
16800 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16801 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16802
16803 int CompareOpc;
16804 bool isDot;
16805
16806 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16807 break;
16808
16809 // Since we are doing this pre-legalize, the RHS can be a constant of
16810 // arbitrary bitwidth which may cause issues when trying to get the value
16811 // from the underlying APInt.
16812 auto RHSAPInt = RHS->getAsAPIntVal();
16813 if (!RHSAPInt.isIntN(64))
16814 break;
16815
16816 unsigned Val = RHSAPInt.getZExtValue();
16817 auto isImpossibleCompare = [&]() {
16818 // If this is a comparison against something other than 0/1, then we know
16819 // that the condition is never/always true.
16820 if (Val != 0 && Val != 1) {
16821 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16822 return N->getOperand(0);
16823 // Always !=, turn it into an unconditional branch.
16824 return DAG.getNode(ISD::BR, dl, MVT::Other,
16825 N->getOperand(0), N->getOperand(4));
16826 }
16827 return SDValue();
16828 };
16829 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16830 unsigned StoreWidth = 0;
16831 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16832 isStoreConditional(LHS, StoreWidth)) {
16833 if (SDValue Impossible = isImpossibleCompare())
16834 return Impossible;
16835 PPC::Predicate CompOpc;
16836 // eq 0 => ne
16837 // ne 0 => eq
16838 // eq 1 => eq
16839 // ne 1 => ne
16840 if (Val == 0)
16841 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16842 else
16843 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16844
16845 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16846 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16847 auto *MemNode = cast<MemSDNode>(LHS);
16848 SDValue ConstSt = DAG.getMemIntrinsicNode(
16850 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16851 MemNode->getMemoryVT(), MemNode->getMemOperand());
16852
16853 SDValue InChain;
16854 // Unchain the branch from the original store conditional.
16855 if (N->getOperand(0) == LHS.getValue(1))
16856 InChain = LHS.getOperand(0);
16857 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16858 SmallVector<SDValue, 4> InChains;
16859 SDValue InTF = N->getOperand(0);
16860 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16861 if (InTF.getOperand(i) != LHS.getValue(1))
16862 InChains.push_back(InTF.getOperand(i));
16863 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16864 }
16865
16866 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16867 DAG.getConstant(CompOpc, dl, MVT::i32),
16868 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16869 ConstSt.getValue(2));
16870 }
16871
16872 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16873 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16874 assert(isDot && "Can't compare against a vector result!");
16875
16876 if (SDValue Impossible = isImpossibleCompare())
16877 return Impossible;
16878
16879 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16880 // Create the PPCISD altivec 'dot' comparison node.
16881 SDValue Ops[] = {
16882 LHS.getOperand(2), // LHS of compare
16883 LHS.getOperand(3), // RHS of compare
16884 DAG.getConstant(CompareOpc, dl, MVT::i32)
16885 };
16886 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16887 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16888
16889 // Unpack the result based on how the target uses it.
16890 PPC::Predicate CompOpc;
16891 switch (LHS.getConstantOperandVal(1)) {
16892 default: // Can't happen, don't crash on invalid number though.
16893 case 0: // Branch on the value of the EQ bit of CR6.
16894 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16895 break;
16896 case 1: // Branch on the inverted value of the EQ bit of CR6.
16897 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16898 break;
16899 case 2: // Branch on the value of the LT bit of CR6.
16900 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16901 break;
16902 case 3: // Branch on the inverted value of the LT bit of CR6.
16903 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16904 break;
16905 }
16906
16907 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16908 DAG.getConstant(CompOpc, dl, MVT::i32),
16909 DAG.getRegister(PPC::CR6, MVT::i32),
16910 N->getOperand(4), CompNode.getValue(1));
16911 }
16912 break;
16913 }
16914 case ISD::BUILD_VECTOR:
16915 return DAGCombineBuildVector(N, DCI);
16916 }
16917
16918 return SDValue();
16919}
16920
16921SDValue
16923 SelectionDAG &DAG,
16924 SmallVectorImpl<SDNode *> &Created) const {
16925 // fold (sdiv X, pow2)
16926 EVT VT = N->getValueType(0);
16927 if (VT == MVT::i64 && !Subtarget.isPPC64())
16928 return SDValue();
16929 if ((VT != MVT::i32 && VT != MVT::i64) ||
16930 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16931 return SDValue();
16932
16933 SDLoc DL(N);
16934 SDValue N0 = N->getOperand(0);
16935
16936 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16937 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16938 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16939
16940 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16941 Created.push_back(Op.getNode());
16942
16943 if (IsNegPow2) {
16944 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16945 Created.push_back(Op.getNode());
16946 }
16947
16948 return Op;
16949}
16950
16951//===----------------------------------------------------------------------===//
16952// Inline Assembly Support
16953//===----------------------------------------------------------------------===//
16954
16956 KnownBits &Known,
16957 const APInt &DemandedElts,
16958 const SelectionDAG &DAG,
16959 unsigned Depth) const {
16960 Known.resetAll();
16961 switch (Op.getOpcode()) {
16962 default: break;
16963 case PPCISD::LBRX: {
16964 // lhbrx is known to have the top bits cleared out.
16965 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16966 Known.Zero = 0xFFFF0000;
16967 break;
16968 }
16970 switch (Op.getConstantOperandVal(0)) {
16971 default: break;
16972 case Intrinsic::ppc_altivec_vcmpbfp_p:
16973 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16974 case Intrinsic::ppc_altivec_vcmpequb_p:
16975 case Intrinsic::ppc_altivec_vcmpequh_p:
16976 case Intrinsic::ppc_altivec_vcmpequw_p:
16977 case Intrinsic::ppc_altivec_vcmpequd_p:
16978 case Intrinsic::ppc_altivec_vcmpequq_p:
16979 case Intrinsic::ppc_altivec_vcmpgefp_p:
16980 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16981 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16982 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16983 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16984 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16985 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16986 case Intrinsic::ppc_altivec_vcmpgtub_p:
16987 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16988 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16989 case Intrinsic::ppc_altivec_vcmpgtud_p:
16990 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16991 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16992 break;
16993 }
16994 break;
16995 }
16997 switch (Op.getConstantOperandVal(1)) {
16998 default:
16999 break;
17000 case Intrinsic::ppc_load2r:
17001 // Top bits are cleared for load2r (which is the same as lhbrx).
17002 Known.Zero = 0xFFFF0000;
17003 break;
17004 }
17005 break;
17006 }
17007 }
17008}
17009
17011 switch (Subtarget.getCPUDirective()) {
17012 default: break;
17013 case PPC::DIR_970:
17014 case PPC::DIR_PWR4:
17015 case PPC::DIR_PWR5:
17016 case PPC::DIR_PWR5X:
17017 case PPC::DIR_PWR6:
17018 case PPC::DIR_PWR6X:
17019 case PPC::DIR_PWR7:
17020 case PPC::DIR_PWR8:
17021 case PPC::DIR_PWR9:
17022 case PPC::DIR_PWR10:
17023 case PPC::DIR_PWR11:
17024 case PPC::DIR_PWR_FUTURE: {
17025 if (!ML)
17026 break;
17027
17029 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17030 // so that we can decrease cache misses and branch-prediction misses.
17031 // Actual alignment of the loop will depend on the hotness check and other
17032 // logic in alignBlocks.
17033 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17034 return Align(32);
17035 }
17036
17037 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17038
17039 // For small loops (between 5 and 8 instructions), align to a 32-byte
17040 // boundary so that the entire loop fits in one instruction-cache line.
17041 uint64_t LoopSize = 0;
17042 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17043 for (const MachineInstr &J : **I) {
17044 LoopSize += TII->getInstSizeInBytes(J);
17045 if (LoopSize > 32)
17046 break;
17047 }
17048
17049 if (LoopSize > 16 && LoopSize <= 32)
17050 return Align(32);
17051
17052 break;
17053 }
17054 }
17055
17057}
17058
17059/// getConstraintType - Given a constraint, return the type of
17060/// constraint it is for this target.
17063 if (Constraint.size() == 1) {
17064 switch (Constraint[0]) {
17065 default: break;
17066 case 'b':
17067 case 'r':
17068 case 'f':
17069 case 'd':
17070 case 'v':
17071 case 'y':
17072 return C_RegisterClass;
17073 case 'Z':
17074 // FIXME: While Z does indicate a memory constraint, it specifically
17075 // indicates an r+r address (used in conjunction with the 'y' modifier
17076 // in the replacement string). Currently, we're forcing the base
17077 // register to be r0 in the asm printer (which is interpreted as zero)
17078 // and forming the complete address in the second register. This is
17079 // suboptimal.
17080 return C_Memory;
17081 }
17082 } else if (Constraint == "wc") { // individual CR bits.
17083 return C_RegisterClass;
17084 } else if (Constraint == "wa" || Constraint == "wd" ||
17085 Constraint == "wf" || Constraint == "ws" ||
17086 Constraint == "wi" || Constraint == "ww") {
17087 return C_RegisterClass; // VSX registers.
17088 }
17089 return TargetLowering::getConstraintType(Constraint);
17090}
17091
17092/// Examine constraint type and operand type and determine a weight value.
17093/// This object must already have been set up with the operand type
17094/// and the current alternative constraint selected.
17097 AsmOperandInfo &info, const char *constraint) const {
17099 Value *CallOperandVal = info.CallOperandVal;
17100 // If we don't have a value, we can't do a match,
17101 // but allow it at the lowest weight.
17102 if (!CallOperandVal)
17103 return CW_Default;
17104 Type *type = CallOperandVal->getType();
17105
17106 // Look at the constraint type.
17107 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17108 return CW_Register; // an individual CR bit.
17109 else if ((StringRef(constraint) == "wa" ||
17110 StringRef(constraint) == "wd" ||
17111 StringRef(constraint) == "wf") &&
17112 type->isVectorTy())
17113 return CW_Register;
17114 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17115 return CW_Register; // just hold 64-bit integers data.
17116 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17117 return CW_Register;
17118 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17119 return CW_Register;
17120
17121 switch (*constraint) {
17122 default:
17124 break;
17125 case 'b':
17126 if (type->isIntegerTy())
17127 weight = CW_Register;
17128 break;
17129 case 'f':
17130 if (type->isFloatTy())
17131 weight = CW_Register;
17132 break;
17133 case 'd':
17134 if (type->isDoubleTy())
17135 weight = CW_Register;
17136 break;
17137 case 'v':
17138 if (type->isVectorTy())
17139 weight = CW_Register;
17140 break;
17141 case 'y':
17142 weight = CW_Register;
17143 break;
17144 case 'Z':
17145 weight = CW_Memory;
17146 break;
17147 }
17148 return weight;
17149}
17150
17151std::pair<unsigned, const TargetRegisterClass *>
17153 StringRef Constraint,
17154 MVT VT) const {
17155 if (Constraint.size() == 1) {
17156 // GCC RS6000 Constraint Letters
17157 switch (Constraint[0]) {
17158 case 'b': // R1-R31
17159 if (VT == MVT::i64 && Subtarget.isPPC64())
17160 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17161 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17162 case 'r': // R0-R31
17163 if (VT == MVT::i64 && Subtarget.isPPC64())
17164 return std::make_pair(0U, &PPC::G8RCRegClass);
17165 return std::make_pair(0U, &PPC::GPRCRegClass);
17166 // 'd' and 'f' constraints are both defined to be "the floating point
17167 // registers", where one is for 32-bit and the other for 64-bit. We don't
17168 // really care overly much here so just give them all the same reg classes.
17169 case 'd':
17170 case 'f':
17171 if (Subtarget.hasSPE()) {
17172 if (VT == MVT::f32 || VT == MVT::i32)
17173 return std::make_pair(0U, &PPC::GPRCRegClass);
17174 if (VT == MVT::f64 || VT == MVT::i64)
17175 return std::make_pair(0U, &PPC::SPERCRegClass);
17176 } else {
17177 if (VT == MVT::f32 || VT == MVT::i32)
17178 return std::make_pair(0U, &PPC::F4RCRegClass);
17179 if (VT == MVT::f64 || VT == MVT::i64)
17180 return std::make_pair(0U, &PPC::F8RCRegClass);
17181 }
17182 break;
17183 case 'v':
17184 if (Subtarget.hasAltivec() && VT.isVector())
17185 return std::make_pair(0U, &PPC::VRRCRegClass);
17186 else if (Subtarget.hasVSX())
17187 // Scalars in Altivec registers only make sense with VSX.
17188 return std::make_pair(0U, &PPC::VFRCRegClass);
17189 break;
17190 case 'y': // crrc
17191 return std::make_pair(0U, &PPC::CRRCRegClass);
17192 }
17193 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17194 // An individual CR bit.
17195 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17196 } else if ((Constraint == "wa" || Constraint == "wd" ||
17197 Constraint == "wf" || Constraint == "wi") &&
17198 Subtarget.hasVSX()) {
17199 // A VSX register for either a scalar (FP) or vector. There is no
17200 // support for single precision scalars on subtargets prior to Power8.
17201 if (VT.isVector())
17202 return std::make_pair(0U, &PPC::VSRCRegClass);
17203 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17204 return std::make_pair(0U, &PPC::VSSRCRegClass);
17205 return std::make_pair(0U, &PPC::VSFRCRegClass);
17206 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17207 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17208 return std::make_pair(0U, &PPC::VSSRCRegClass);
17209 else
17210 return std::make_pair(0U, &PPC::VSFRCRegClass);
17211 } else if (Constraint == "lr") {
17212 if (VT == MVT::i64)
17213 return std::make_pair(0U, &PPC::LR8RCRegClass);
17214 else
17215 return std::make_pair(0U, &PPC::LRRCRegClass);
17216 }
17217
17218 // Handle special cases of physical registers that are not properly handled
17219 // by the base class.
17220 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17221 // If we name a VSX register, we can't defer to the base class because it
17222 // will not recognize the correct register (their names will be VSL{0-31}
17223 // and V{0-31} so they won't match). So we match them here.
17224 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17225 int VSNum = atoi(Constraint.data() + 3);
17226 assert(VSNum >= 0 && VSNum <= 63 &&
17227 "Attempted to access a vsr out of range");
17228 if (VSNum < 32)
17229 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17230 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17231 }
17232
17233 // For float registers, we can't defer to the base class as it will match
17234 // the SPILLTOVSRRC class.
17235 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17236 int RegNum = atoi(Constraint.data() + 2);
17237 if (RegNum > 31 || RegNum < 0)
17238 report_fatal_error("Invalid floating point register number");
17239 if (VT == MVT::f32 || VT == MVT::i32)
17240 return Subtarget.hasSPE()
17241 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17242 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17243 if (VT == MVT::f64 || VT == MVT::i64)
17244 return Subtarget.hasSPE()
17245 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17246 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17247 }
17248 }
17249
17250 std::pair<unsigned, const TargetRegisterClass *> R =
17252
17253 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17254 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17255 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17256 // register.
17257 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17258 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17259 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17260 PPC::GPRCRegClass.contains(R.first))
17261 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17262 PPC::sub_32, &PPC::G8RCRegClass),
17263 &PPC::G8RCRegClass);
17264
17265 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17266 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17267 R.first = PPC::CR0;
17268 R.second = &PPC::CRRCRegClass;
17269 }
17270 // FIXME: This warning should ideally be emitted in the front end.
17271 const auto &TM = getTargetMachine();
17272 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17273 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17274 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17275 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17276 errs() << "warning: vector registers 20 to 32 are reserved in the "
17277 "default AIX AltiVec ABI and cannot be used\n";
17278 }
17279
17280 return R;
17281}
17282
17283/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17284/// vector. If it is invalid, don't add anything to Ops.
17286 StringRef Constraint,
17287 std::vector<SDValue> &Ops,
17288 SelectionDAG &DAG) const {
17289 SDValue Result;
17290
17291 // Only support length 1 constraints.
17292 if (Constraint.size() > 1)
17293 return;
17294
17295 char Letter = Constraint[0];
17296 switch (Letter) {
17297 default: break;
17298 case 'I':
17299 case 'J':
17300 case 'K':
17301 case 'L':
17302 case 'M':
17303 case 'N':
17304 case 'O':
17305 case 'P': {
17306 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17307 if (!CST) return; // Must be an immediate to match.
17308 SDLoc dl(Op);
17309 int64_t Value = CST->getSExtValue();
17310 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17311 // numbers are printed as such.
17312 switch (Letter) {
17313 default: llvm_unreachable("Unknown constraint letter!");
17314 case 'I': // "I" is a signed 16-bit constant.
17315 if (isInt<16>(Value))
17316 Result = DAG.getTargetConstant(Value, dl, TCVT);
17317 break;
17318 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17319 if (isShiftedUInt<16, 16>(Value))
17320 Result = DAG.getTargetConstant(Value, dl, TCVT);
17321 break;
17322 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17323 if (isShiftedInt<16, 16>(Value))
17324 Result = DAG.getTargetConstant(Value, dl, TCVT);
17325 break;
17326 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17327 if (isUInt<16>(Value))
17328 Result = DAG.getTargetConstant(Value, dl, TCVT);
17329 break;
17330 case 'M': // "M" is a constant that is greater than 31.
17331 if (Value > 31)
17332 Result = DAG.getTargetConstant(Value, dl, TCVT);
17333 break;
17334 case 'N': // "N" is a positive constant that is an exact power of two.
17335 if (Value > 0 && isPowerOf2_64(Value))
17336 Result = DAG.getTargetConstant(Value, dl, TCVT);
17337 break;
17338 case 'O': // "O" is the constant zero.
17339 if (Value == 0)
17340 Result = DAG.getTargetConstant(Value, dl, TCVT);
17341 break;
17342 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17343 if (isInt<16>(-Value))
17344 Result = DAG.getTargetConstant(Value, dl, TCVT);
17345 break;
17346 }
17347 break;
17348 }
17349 }
17350
17351 if (Result.getNode()) {
17352 Ops.push_back(Result);
17353 return;
17354 }
17355
17356 // Handle standard constraint letters.
17357 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17358}
17359
17362 SelectionDAG &DAG) const {
17363 if (I.getNumOperands() <= 1)
17364 return;
17365 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17366 return;
17367 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17368 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17369 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17370 return;
17371
17372 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17373 Ops.push_back(DAG.getMDNode(MDN));
17374}
17375
17376// isLegalAddressingMode - Return true if the addressing mode represented
17377// by AM is legal for this target, for a load/store of the specified type.
17379 const AddrMode &AM, Type *Ty,
17380 unsigned AS,
17381 Instruction *I) const {
17382 // Vector type r+i form is supported since power9 as DQ form. We don't check
17383 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17384 // imm form is preferred and the offset can be adjusted to use imm form later
17385 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17386 // max offset to check legal addressing mode, we should be a little aggressive
17387 // to contain other offsets for that LSRUse.
17388 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17389 return false;
17390
17391 // PPC allows a sign-extended 16-bit immediate field.
17392 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17393 return false;
17394
17395 // No global is ever allowed as a base.
17396 if (AM.BaseGV)
17397 return false;
17398
17399 // PPC only support r+r,
17400 switch (AM.Scale) {
17401 case 0: // "r+i" or just "i", depending on HasBaseReg.
17402 break;
17403 case 1:
17404 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17405 return false;
17406 // Otherwise we have r+r or r+i.
17407 break;
17408 case 2:
17409 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17410 return false;
17411 // Allow 2*r as r+r.
17412 break;
17413 default:
17414 // No other scales are supported.
17415 return false;
17416 }
17417
17418 return true;
17419}
17420
17421SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17422 SelectionDAG &DAG) const {
17424 MachineFrameInfo &MFI = MF.getFrameInfo();
17425 MFI.setReturnAddressIsTaken(true);
17426
17428 return SDValue();
17429
17430 SDLoc dl(Op);
17431 unsigned Depth = Op.getConstantOperandVal(0);
17432
17433 // Make sure the function does not optimize away the store of the RA to
17434 // the stack.
17435 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17436 FuncInfo->setLRStoreRequired();
17437 auto PtrVT = getPointerTy(MF.getDataLayout());
17438
17439 if (Depth > 0) {
17440 // The link register (return address) is saved in the caller's frame
17441 // not the callee's stack frame. So we must get the caller's frame
17442 // address and load the return address at the LR offset from there.
17443 SDValue FrameAddr =
17444 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17445 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17446 SDValue Offset =
17447 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17448 Subtarget.getScalarIntVT());
17449 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17450 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17452 }
17453
17454 // Just load the return address off the stack.
17455 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17456 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17458}
17459
17460SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17461 SelectionDAG &DAG) const {
17462 SDLoc dl(Op);
17463 unsigned Depth = Op.getConstantOperandVal(0);
17464
17466 MachineFrameInfo &MFI = MF.getFrameInfo();
17467 MFI.setFrameAddressIsTaken(true);
17468
17469 EVT PtrVT = getPointerTy(MF.getDataLayout());
17470 bool isPPC64 = PtrVT == MVT::i64;
17471
17472 // Naked functions never have a frame pointer, and so we use r1. For all
17473 // other functions, this decision must be delayed until during PEI.
17474 unsigned FrameReg;
17475 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17476 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17477 else
17478 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17479
17480 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17481 PtrVT);
17482 while (Depth--)
17483 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17484 FrameAddr, MachinePointerInfo());
17485 return FrameAddr;
17486}
17487
17488#define GET_REGISTER_MATCHER
17489#include "PPCGenAsmMatcher.inc"
17490
17492 const MachineFunction &MF) const {
17493 bool IsPPC64 = Subtarget.isPPC64();
17494
17495 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
17496 if (!Is64Bit && VT != LLT::scalar(32))
17497 report_fatal_error("Invalid register global variable type");
17498
17500 if (!Reg)
17502 Twine("Invalid global name register \"" + StringRef(RegName) + "\"."));
17503
17504 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
17505 // Need followup investigation as to why.
17506 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
17507 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
17508 StringRef(RegName) + "\"."));
17509
17510 // Convert GPR to GP8R register for 64bit.
17511 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
17512 Reg = Reg.id() - PPC::R0 + PPC::X0;
17513
17514 return Reg;
17515}
17516
17518 // 32-bit SVR4 ABI access everything as got-indirect.
17519 if (Subtarget.is32BitELFABI())
17520 return true;
17521
17522 // AIX accesses everything indirectly through the TOC, which is similar to
17523 // the GOT.
17524 if (Subtarget.isAIXABI())
17525 return true;
17526
17528 // If it is small or large code model, module locals are accessed
17529 // indirectly by loading their address from .toc/.got.
17530 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17531 return true;
17532
17533 // JumpTable and BlockAddress are accessed as got-indirect.
17534 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17535 return true;
17536
17537 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17538 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17539
17540 return false;
17541}
17542
17543bool
17545 // The PowerPC target isn't yet aware of offsets.
17546 return false;
17547}
17548
17550 const CallInst &I,
17551 MachineFunction &MF,
17552 unsigned Intrinsic) const {
17553 switch (Intrinsic) {
17554 case Intrinsic::ppc_atomicrmw_xchg_i128:
17555 case Intrinsic::ppc_atomicrmw_add_i128:
17556 case Intrinsic::ppc_atomicrmw_sub_i128:
17557 case Intrinsic::ppc_atomicrmw_nand_i128:
17558 case Intrinsic::ppc_atomicrmw_and_i128:
17559 case Intrinsic::ppc_atomicrmw_or_i128:
17560 case Intrinsic::ppc_atomicrmw_xor_i128:
17561 case Intrinsic::ppc_cmpxchg_i128:
17563 Info.memVT = MVT::i128;
17564 Info.ptrVal = I.getArgOperand(0);
17565 Info.offset = 0;
17566 Info.align = Align(16);
17569 return true;
17570 case Intrinsic::ppc_atomic_load_i128:
17572 Info.memVT = MVT::i128;
17573 Info.ptrVal = I.getArgOperand(0);
17574 Info.offset = 0;
17575 Info.align = Align(16);
17577 return true;
17578 case Intrinsic::ppc_atomic_store_i128:
17580 Info.memVT = MVT::i128;
17581 Info.ptrVal = I.getArgOperand(2);
17582 Info.offset = 0;
17583 Info.align = Align(16);
17585 return true;
17586 case Intrinsic::ppc_altivec_lvx:
17587 case Intrinsic::ppc_altivec_lvxl:
17588 case Intrinsic::ppc_altivec_lvebx:
17589 case Intrinsic::ppc_altivec_lvehx:
17590 case Intrinsic::ppc_altivec_lvewx:
17591 case Intrinsic::ppc_vsx_lxvd2x:
17592 case Intrinsic::ppc_vsx_lxvw4x:
17593 case Intrinsic::ppc_vsx_lxvd2x_be:
17594 case Intrinsic::ppc_vsx_lxvw4x_be:
17595 case Intrinsic::ppc_vsx_lxvl:
17596 case Intrinsic::ppc_vsx_lxvll: {
17597 EVT VT;
17598 switch (Intrinsic) {
17599 case Intrinsic::ppc_altivec_lvebx:
17600 VT = MVT::i8;
17601 break;
17602 case Intrinsic::ppc_altivec_lvehx:
17603 VT = MVT::i16;
17604 break;
17605 case Intrinsic::ppc_altivec_lvewx:
17606 VT = MVT::i32;
17607 break;
17608 case Intrinsic::ppc_vsx_lxvd2x:
17609 case Intrinsic::ppc_vsx_lxvd2x_be:
17610 VT = MVT::v2f64;
17611 break;
17612 default:
17613 VT = MVT::v4i32;
17614 break;
17615 }
17616
17618 Info.memVT = VT;
17619 Info.ptrVal = I.getArgOperand(0);
17620 Info.offset = -VT.getStoreSize()+1;
17621 Info.size = 2*VT.getStoreSize()-1;
17622 Info.align = Align(1);
17624 return true;
17625 }
17626 case Intrinsic::ppc_altivec_stvx:
17627 case Intrinsic::ppc_altivec_stvxl:
17628 case Intrinsic::ppc_altivec_stvebx:
17629 case Intrinsic::ppc_altivec_stvehx:
17630 case Intrinsic::ppc_altivec_stvewx:
17631 case Intrinsic::ppc_vsx_stxvd2x:
17632 case Intrinsic::ppc_vsx_stxvw4x:
17633 case Intrinsic::ppc_vsx_stxvd2x_be:
17634 case Intrinsic::ppc_vsx_stxvw4x_be:
17635 case Intrinsic::ppc_vsx_stxvl:
17636 case Intrinsic::ppc_vsx_stxvll: {
17637 EVT VT;
17638 switch (Intrinsic) {
17639 case Intrinsic::ppc_altivec_stvebx:
17640 VT = MVT::i8;
17641 break;
17642 case Intrinsic::ppc_altivec_stvehx:
17643 VT = MVT::i16;
17644 break;
17645 case Intrinsic::ppc_altivec_stvewx:
17646 VT = MVT::i32;
17647 break;
17648 case Intrinsic::ppc_vsx_stxvd2x:
17649 case Intrinsic::ppc_vsx_stxvd2x_be:
17650 VT = MVT::v2f64;
17651 break;
17652 default:
17653 VT = MVT::v4i32;
17654 break;
17655 }
17656
17658 Info.memVT = VT;
17659 Info.ptrVal = I.getArgOperand(1);
17660 Info.offset = -VT.getStoreSize()+1;
17661 Info.size = 2*VT.getStoreSize()-1;
17662 Info.align = Align(1);
17664 return true;
17665 }
17666 case Intrinsic::ppc_stdcx:
17667 case Intrinsic::ppc_stwcx:
17668 case Intrinsic::ppc_sthcx:
17669 case Intrinsic::ppc_stbcx: {
17670 EVT VT;
17671 auto Alignment = Align(8);
17672 switch (Intrinsic) {
17673 case Intrinsic::ppc_stdcx:
17674 VT = MVT::i64;
17675 break;
17676 case Intrinsic::ppc_stwcx:
17677 VT = MVT::i32;
17678 Alignment = Align(4);
17679 break;
17680 case Intrinsic::ppc_sthcx:
17681 VT = MVT::i16;
17682 Alignment = Align(2);
17683 break;
17684 case Intrinsic::ppc_stbcx:
17685 VT = MVT::i8;
17686 Alignment = Align(1);
17687 break;
17688 }
17690 Info.memVT = VT;
17691 Info.ptrVal = I.getArgOperand(0);
17692 Info.offset = 0;
17693 Info.align = Alignment;
17695 return true;
17696 }
17697 default:
17698 break;
17699 }
17700
17701 return false;
17702}
17703
17704/// It returns EVT::Other if the type should be determined using generic
17705/// target-independent logic.
17707 const MemOp &Op, const AttributeList &FuncAttributes) const {
17708 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17709 // We should use Altivec/VSX loads and stores when available. For unaligned
17710 // addresses, unaligned VSX loads are only fast starting with the P8.
17711 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17712 if (Op.isMemset() && Subtarget.hasVSX()) {
17713 uint64_t TailSize = Op.size() % 16;
17714 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17715 // element if vector element type matches tail store. For tail size
17716 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17717 if (TailSize > 2 && TailSize <= 4) {
17718 return MVT::v8i16;
17719 }
17720 return MVT::v4i32;
17721 }
17722 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17723 return MVT::v4i32;
17724 }
17725 }
17726
17727 if (Subtarget.isPPC64()) {
17728 return MVT::i64;
17729 }
17730
17731 return MVT::i32;
17732}
17733
17734/// Returns true if it is beneficial to convert a load of a constant
17735/// to just the constant itself.
17737 Type *Ty) const {
17738 assert(Ty->isIntegerTy());
17739
17740 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17741 return !(BitSize == 0 || BitSize > 64);
17742}
17743
17745 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17746 return false;
17747 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17748 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17749 return NumBits1 == 64 && NumBits2 == 32;
17750}
17751
17753 if (!VT1.isInteger() || !VT2.isInteger())
17754 return false;
17755 unsigned NumBits1 = VT1.getSizeInBits();
17756 unsigned NumBits2 = VT2.getSizeInBits();
17757 return NumBits1 == 64 && NumBits2 == 32;
17758}
17759
17761 // Generally speaking, zexts are not free, but they are free when they can be
17762 // folded with other operations.
17763 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17764 EVT MemVT = LD->getMemoryVT();
17765 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17766 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17767 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17768 LD->getExtensionType() == ISD::ZEXTLOAD))
17769 return true;
17770 }
17771
17772 // FIXME: Add other cases...
17773 // - 32-bit shifts with a zext to i64
17774 // - zext after ctlz, bswap, etc.
17775 // - zext after and by a constant mask
17776
17777 return TargetLowering::isZExtFree(Val, VT2);
17778}
17779
17780bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17781 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17782 "invalid fpext types");
17783 // Extending to float128 is not free.
17784 if (DestVT == MVT::f128)
17785 return false;
17786 return true;
17787}
17788
17790 return isInt<16>(Imm) || isUInt<16>(Imm);
17791}
17792
17794 return isInt<16>(Imm) || isUInt<16>(Imm);
17795}
17796
17799 unsigned *Fast) const {
17801 return false;
17802
17803 // PowerPC supports unaligned memory access for simple non-vector types.
17804 // Although accessing unaligned addresses is not as efficient as accessing
17805 // aligned addresses, it is generally more efficient than manual expansion,
17806 // and generally only traps for software emulation when crossing page
17807 // boundaries.
17808
17809 if (!VT.isSimple())
17810 return false;
17811
17812 if (VT.isFloatingPoint() && !VT.isVector() &&
17813 !Subtarget.allowsUnalignedFPAccess())
17814 return false;
17815
17816 if (VT.getSimpleVT().isVector()) {
17817 if (Subtarget.hasVSX()) {
17818 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17819 VT != MVT::v4f32 && VT != MVT::v4i32)
17820 return false;
17821 } else {
17822 return false;
17823 }
17824 }
17825
17826 if (VT == MVT::ppcf128)
17827 return false;
17828
17829 if (Fast)
17830 *Fast = 1;
17831
17832 return true;
17833}
17834
17836 SDValue C) const {
17837 // Check integral scalar types.
17838 if (!VT.isScalarInteger())
17839 return false;
17840 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17841 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17842 return false;
17843 // This transformation will generate >= 2 operations. But the following
17844 // cases will generate <= 2 instructions during ISEL. So exclude them.
17845 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17846 // HW instruction, ie. MULLI
17847 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17848 // instruction is needed than case 1, ie. MULLI and RLDICR
17849 int64_t Imm = ConstNode->getSExtValue();
17850 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17851 Imm >>= Shift;
17852 if (isInt<16>(Imm))
17853 return false;
17854 uint64_t UImm = static_cast<uint64_t>(Imm);
17855 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17856 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17857 return true;
17858 }
17859 return false;
17860}
17861
17863 EVT VT) const {
17866}
17867
17869 Type *Ty) const {
17870 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17871 return false;
17872 switch (Ty->getScalarType()->getTypeID()) {
17873 case Type::FloatTyID:
17874 case Type::DoubleTyID:
17875 return true;
17876 case Type::FP128TyID:
17877 return Subtarget.hasP9Vector();
17878 default:
17879 return false;
17880 }
17881}
17882
17883// FIXME: add more patterns which are not profitable to hoist.
17885 if (!I->hasOneUse())
17886 return true;
17887
17888 Instruction *User = I->user_back();
17889 assert(User && "A single use instruction with no uses.");
17890
17891 switch (I->getOpcode()) {
17892 case Instruction::FMul: {
17893 // Don't break FMA, PowerPC prefers FMA.
17894 if (User->getOpcode() != Instruction::FSub &&
17895 User->getOpcode() != Instruction::FAdd)
17896 return true;
17897
17899 const Function *F = I->getFunction();
17900 const DataLayout &DL = F->getDataLayout();
17901 Type *Ty = User->getOperand(0)->getType();
17902
17903 return !(
17906 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17907 }
17908 case Instruction::Load: {
17909 // Don't break "store (load float*)" pattern, this pattern will be combined
17910 // to "store (load int32)" in later InstCombine pass. See function
17911 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17912 // cycles than loading a 32 bit integer.
17913 LoadInst *LI = cast<LoadInst>(I);
17914 // For the loads that combineLoadToOperationType does nothing, like
17915 // ordered load, it should be profitable to hoist them.
17916 // For swifterror load, it can only be used for pointer to pointer type, so
17917 // later type check should get rid of this case.
17918 if (!LI->isUnordered())
17919 return true;
17920
17921 if (User->getOpcode() != Instruction::Store)
17922 return true;
17923
17924 if (I->getType()->getTypeID() != Type::FloatTyID)
17925 return true;
17926
17927 return false;
17928 }
17929 default:
17930 return true;
17931 }
17932 return true;
17933}
17934
17935const MCPhysReg *
17937 // LR is a callee-save register, but we must treat it as clobbered by any call
17938 // site. Hence we include LR in the scratch registers, which are in turn added
17939 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17940 // to CTR, which is used by any indirect call.
17941 static const MCPhysReg ScratchRegs[] = {
17942 PPC::X12, PPC::LR8, PPC::CTR8, 0
17943 };
17944
17945 return ScratchRegs;
17946}
17947
17949 const Constant *PersonalityFn) const {
17950 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17951}
17952
17954 const Constant *PersonalityFn) const {
17955 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17956}
17957
17958bool
17960 EVT VT , unsigned DefinedValues) const {
17961 if (VT == MVT::v2i64)
17962 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17963
17964 if (Subtarget.hasVSX())
17965 return true;
17966
17968}
17969
17971 if (DisableILPPref || Subtarget.enableMachineScheduler())
17973
17974 return Sched::ILP;
17975}
17976
17977// Create a fast isel object.
17978FastISel *
17980 const TargetLibraryInfo *LibInfo) const {
17981 return PPC::createFastISel(FuncInfo, LibInfo);
17982}
17983
17984// 'Inverted' means the FMA opcode after negating one multiplicand.
17985// For example, (fma -a b c) = (fnmsub a b c)
17986static unsigned invertFMAOpcode(unsigned Opc) {
17987 switch (Opc) {
17988 default:
17989 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17990 case ISD::FMA:
17991 return PPCISD::FNMSUB;
17992 case PPCISD::FNMSUB:
17993 return ISD::FMA;
17994 }
17995}
17996
17998 bool LegalOps, bool OptForSize,
18000 unsigned Depth) const {
18002 return SDValue();
18003
18004 unsigned Opc = Op.getOpcode();
18005 EVT VT = Op.getValueType();
18006 SDNodeFlags Flags = Op.getNode()->getFlags();
18007
18008 switch (Opc) {
18009 case PPCISD::FNMSUB:
18010 if (!Op.hasOneUse() || !isTypeLegal(VT))
18011 break;
18012
18014 SDValue N0 = Op.getOperand(0);
18015 SDValue N1 = Op.getOperand(1);
18016 SDValue N2 = Op.getOperand(2);
18017 SDLoc Loc(Op);
18018
18020 SDValue NegN2 =
18021 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18022
18023 if (!NegN2)
18024 return SDValue();
18025
18026 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18027 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18028 // These transformations may change sign of zeroes. For example,
18029 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18030 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18031 // Try and choose the cheaper one to negate.
18033 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18034 N0Cost, Depth + 1);
18035
18037 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18038 N1Cost, Depth + 1);
18039
18040 if (NegN0 && N0Cost <= N1Cost) {
18041 Cost = std::min(N0Cost, N2Cost);
18042 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18043 } else if (NegN1) {
18044 Cost = std::min(N1Cost, N2Cost);
18045 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18046 }
18047 }
18048
18049 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18050 if (isOperationLegal(ISD::FMA, VT)) {
18051 Cost = N2Cost;
18052 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18053 }
18054
18055 break;
18056 }
18057
18058 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18059 Cost, Depth);
18060}
18061
18062// Override to enable LOAD_STACK_GUARD lowering on Linux.
18064 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18065 return true;
18067}
18068
18069// Override to disable global variable loading on Linux and insert AIX canary
18070// word declaration.
18072 if (Subtarget.isAIXABI()) {
18073 M.getOrInsertGlobal(AIXSSPCanaryWordName,
18074 PointerType::getUnqual(M.getContext()));
18075 return;
18076 }
18077 if (!Subtarget.isTargetLinux())
18079}
18080
18082 if (Subtarget.isAIXABI())
18083 return M.getGlobalVariable(AIXSSPCanaryWordName);
18085}
18086
18088 bool ForCodeSize) const {
18089 if (!VT.isSimple() || !Subtarget.hasVSX())
18090 return false;
18091
18092 switch(VT.getSimpleVT().SimpleTy) {
18093 default:
18094 // For FP types that are currently not supported by PPC backend, return
18095 // false. Examples: f16, f80.
18096 return false;
18097 case MVT::f32:
18098 case MVT::f64: {
18099 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18100 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18101 return true;
18102 }
18103 bool IsExact;
18104 APSInt IntResult(16, false);
18105 // The rounding mode doesn't really matter because we only care about floats
18106 // that can be converted to integers exactly.
18107 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18108 // For exact values in the range [-16, 15] we can materialize the float.
18109 if (IsExact && IntResult <= 15 && IntResult >= -16)
18110 return true;
18111 return Imm.isZero();
18112 }
18113 case MVT::ppcf128:
18114 return Imm.isPosZero();
18115 }
18116}
18117
18118// For vector shift operation op, fold
18119// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18121 SelectionDAG &DAG) {
18122 SDValue N0 = N->getOperand(0);
18123 SDValue N1 = N->getOperand(1);
18124 EVT VT = N0.getValueType();
18125 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18126 unsigned Opcode = N->getOpcode();
18127 unsigned TargetOpcode;
18128
18129 switch (Opcode) {
18130 default:
18131 llvm_unreachable("Unexpected shift operation");
18132 case ISD::SHL:
18133 TargetOpcode = PPCISD::SHL;
18134 break;
18135 case ISD::SRL:
18136 TargetOpcode = PPCISD::SRL;
18137 break;
18138 case ISD::SRA:
18139 TargetOpcode = PPCISD::SRA;
18140 break;
18141 }
18142
18143 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18144 N1->getOpcode() == ISD::AND)
18145 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18146 if (Mask->getZExtValue() == OpSizeInBits - 1)
18147 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18148
18149 return SDValue();
18150}
18151
18152SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18153 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18154 return Value;
18155
18156 SDValue N0 = N->getOperand(0);
18157 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18158 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18159 N0.getOpcode() != ISD::SIGN_EXTEND ||
18160 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18161 N->getValueType(0) != MVT::i64)
18162 return SDValue();
18163
18164 // We can't save an operation here if the value is already extended, and
18165 // the existing shift is easier to combine.
18166 SDValue ExtsSrc = N0.getOperand(0);
18167 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18168 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18169 return SDValue();
18170
18171 SDLoc DL(N0);
18172 SDValue ShiftBy = SDValue(CN1, 0);
18173 // We want the shift amount to be i32 on the extswli, but the shift could
18174 // have an i64.
18175 if (ShiftBy.getValueType() == MVT::i64)
18176 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18177
18178 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18179 ShiftBy);
18180}
18181
18182SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18183 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18184 return Value;
18185
18186 return SDValue();
18187}
18188
18189SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18190 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18191 return Value;
18192
18193 return SDValue();
18194}
18195
18196// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18197// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18198// When C is zero, the equation (addi Z, -C) can be simplified to Z
18199// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18201 const PPCSubtarget &Subtarget) {
18202 if (!Subtarget.isPPC64())
18203 return SDValue();
18204
18205 SDValue LHS = N->getOperand(0);
18206 SDValue RHS = N->getOperand(1);
18207
18208 auto isZextOfCompareWithConstant = [](SDValue Op) {
18209 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18210 Op.getValueType() != MVT::i64)
18211 return false;
18212
18213 SDValue Cmp = Op.getOperand(0);
18214 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18215 Cmp.getOperand(0).getValueType() != MVT::i64)
18216 return false;
18217
18218 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18219 int64_t NegConstant = 0 - Constant->getSExtValue();
18220 // Due to the limitations of the addi instruction,
18221 // -C is required to be [-32768, 32767].
18222 return isInt<16>(NegConstant);
18223 }
18224
18225 return false;
18226 };
18227
18228 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18229 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18230
18231 // If there is a pattern, canonicalize a zext operand to the RHS.
18232 if (LHSHasPattern && !RHSHasPattern)
18233 std::swap(LHS, RHS);
18234 else if (!LHSHasPattern && !RHSHasPattern)
18235 return SDValue();
18236
18237 SDLoc DL(N);
18238 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
18239 SDValue Cmp = RHS.getOperand(0);
18240 SDValue Z = Cmp.getOperand(0);
18241 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18242 int64_t NegConstant = 0 - Constant->getSExtValue();
18243
18244 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18245 default: break;
18246 case ISD::SETNE: {
18247 // when C == 0
18248 // --> addze X, (addic Z, -1).carry
18249 // /
18250 // add X, (zext(setne Z, C))--
18251 // \ when -32768 <= -C <= 32767 && C != 0
18252 // --> addze X, (addic (addi Z, -C), -1).carry
18253 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18254 DAG.getConstant(NegConstant, DL, MVT::i64));
18255 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18256 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18257 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
18258 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18259 SDValue(Addc.getNode(), 1));
18260 }
18261 case ISD::SETEQ: {
18262 // when C == 0
18263 // --> addze X, (subfic Z, 0).carry
18264 // /
18265 // add X, (zext(sete Z, C))--
18266 // \ when -32768 <= -C <= 32767 && C != 0
18267 // --> addze X, (subfic (addi Z, -C), 0).carry
18268 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18269 DAG.getConstant(NegConstant, DL, MVT::i64));
18270 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18271 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18272 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
18273 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18274 SDValue(Subc.getNode(), 1));
18275 }
18276 }
18277
18278 return SDValue();
18279}
18280
18281// Transform
18282// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18283// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18284// In this case both C1 and C2 must be known constants.
18285// C1+C2 must fit into a 34 bit signed integer.
18287 const PPCSubtarget &Subtarget) {
18288 if (!Subtarget.isUsingPCRelativeCalls())
18289 return SDValue();
18290
18291 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18292 // If we find that node try to cast the Global Address and the Constant.
18293 SDValue LHS = N->getOperand(0);
18294 SDValue RHS = N->getOperand(1);
18295
18296 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18297 std::swap(LHS, RHS);
18298
18299 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18300 return SDValue();
18301
18302 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18303 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18304 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18305
18306 // Check that both casts succeeded.
18307 if (!GSDN || !ConstNode)
18308 return SDValue();
18309
18310 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18311 SDLoc DL(GSDN);
18312
18313 // The signed int offset needs to fit in 34 bits.
18314 if (!isInt<34>(NewOffset))
18315 return SDValue();
18316
18317 // The new global address is a copy of the old global address except
18318 // that it has the updated Offset.
18319 SDValue GA =
18320 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18321 NewOffset, GSDN->getTargetFlags());
18322 SDValue MatPCRel =
18323 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18324 return MatPCRel;
18325}
18326
18327SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18328 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18329 return Value;
18330
18331 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18332 return Value;
18333
18334 return SDValue();
18335}
18336
18337// Detect TRUNCATE operations on bitcasts of float128 values.
18338// What we are looking for here is the situtation where we extract a subset
18339// of bits from a 128 bit float.
18340// This can be of two forms:
18341// 1) BITCAST of f128 feeding TRUNCATE
18342// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18343// The reason this is required is because we do not have a legal i128 type
18344// and so we want to prevent having to store the f128 and then reload part
18345// of it.
18346SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18347 DAGCombinerInfo &DCI) const {
18348 // If we are using CRBits then try that first.
18349 if (Subtarget.useCRBits()) {
18350 // Check if CRBits did anything and return that if it did.
18351 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18352 return CRTruncValue;
18353 }
18354
18355 SDLoc dl(N);
18356 SDValue Op0 = N->getOperand(0);
18357
18358 // Looking for a truncate of i128 to i64.
18359 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18360 return SDValue();
18361
18362 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18363
18364 // SRL feeding TRUNCATE.
18365 if (Op0.getOpcode() == ISD::SRL) {
18366 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18367 // The right shift has to be by 64 bits.
18368 if (!ConstNode || ConstNode->getZExtValue() != 64)
18369 return SDValue();
18370
18371 // Switch the element number to extract.
18372 EltToExtract = EltToExtract ? 0 : 1;
18373 // Update Op0 past the SRL.
18374 Op0 = Op0.getOperand(0);
18375 }
18376
18377 // BITCAST feeding a TRUNCATE possibly via SRL.
18378 if (Op0.getOpcode() == ISD::BITCAST &&
18379 Op0.getValueType() == MVT::i128 &&
18380 Op0.getOperand(0).getValueType() == MVT::f128) {
18381 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18382 return DCI.DAG.getNode(
18383 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18384 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18385 }
18386 return SDValue();
18387}
18388
18389SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18390 SelectionDAG &DAG = DCI.DAG;
18391
18392 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18393 if (!ConstOpOrElement)
18394 return SDValue();
18395
18396 // An imul is usually smaller than the alternative sequence for legal type.
18398 isOperationLegal(ISD::MUL, N->getValueType(0)))
18399 return SDValue();
18400
18401 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18402 switch (this->Subtarget.getCPUDirective()) {
18403 default:
18404 // TODO: enhance the condition for subtarget before pwr8
18405 return false;
18406 case PPC::DIR_PWR8:
18407 // type mul add shl
18408 // scalar 4 1 1
18409 // vector 7 2 2
18410 return true;
18411 case PPC::DIR_PWR9:
18412 case PPC::DIR_PWR10:
18413 case PPC::DIR_PWR11:
18415 // type mul add shl
18416 // scalar 5 2 2
18417 // vector 7 2 2
18418
18419 // The cycle RATIO of related operations are showed as a table above.
18420 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18421 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18422 // are 4, it is always profitable; but for 3 instrs patterns
18423 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18424 // So we should only do it for vector type.
18425 return IsAddOne && IsNeg ? VT.isVector() : true;
18426 }
18427 };
18428
18429 EVT VT = N->getValueType(0);
18430 SDLoc DL(N);
18431
18432 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18433 bool IsNeg = MulAmt.isNegative();
18434 APInt MulAmtAbs = MulAmt.abs();
18435
18436 if ((MulAmtAbs - 1).isPowerOf2()) {
18437 // (mul x, 2^N + 1) => (add (shl x, N), x)
18438 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18439
18440 if (!IsProfitable(IsNeg, true, VT))
18441 return SDValue();
18442
18443 SDValue Op0 = N->getOperand(0);
18444 SDValue Op1 =
18445 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18446 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18447 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18448
18449 if (!IsNeg)
18450 return Res;
18451
18452 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18453 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18454 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18455 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18456
18457 if (!IsProfitable(IsNeg, false, VT))
18458 return SDValue();
18459
18460 SDValue Op0 = N->getOperand(0);
18461 SDValue Op1 =
18462 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18463 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18464
18465 if (!IsNeg)
18466 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18467 else
18468 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18469
18470 } else {
18471 return SDValue();
18472 }
18473}
18474
18475// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18476// in combiner since we need to check SD flags and other subtarget features.
18477SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18478 DAGCombinerInfo &DCI) const {
18479 SDValue N0 = N->getOperand(0);
18480 SDValue N1 = N->getOperand(1);
18481 SDValue N2 = N->getOperand(2);
18482 SDNodeFlags Flags = N->getFlags();
18483 EVT VT = N->getValueType(0);
18484 SelectionDAG &DAG = DCI.DAG;
18486 unsigned Opc = N->getOpcode();
18487 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18488 bool LegalOps = !DCI.isBeforeLegalizeOps();
18489 SDLoc Loc(N);
18490
18491 if (!isOperationLegal(ISD::FMA, VT))
18492 return SDValue();
18493
18494 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18495 // since (fnmsub a b c)=-0 while c-ab=+0.
18496 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18497 return SDValue();
18498
18499 // (fma (fneg a) b c) => (fnmsub a b c)
18500 // (fnmsub (fneg a) b c) => (fma a b c)
18501 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18502 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18503
18504 // (fma a (fneg b) c) => (fnmsub a b c)
18505 // (fnmsub a (fneg b) c) => (fma a b c)
18506 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18507 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18508
18509 return SDValue();
18510}
18511
18512bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18513 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18514 if (!Subtarget.is64BitELFABI())
18515 return false;
18516
18517 // If not a tail call then no need to proceed.
18518 if (!CI->isTailCall())
18519 return false;
18520
18521 // If sibling calls have been disabled and tail-calls aren't guaranteed
18522 // there is no reason to duplicate.
18523 auto &TM = getTargetMachine();
18524 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18525 return false;
18526
18527 // Can't tail call a function called indirectly, or if it has variadic args.
18528 const Function *Callee = CI->getCalledFunction();
18529 if (!Callee || Callee->isVarArg())
18530 return false;
18531
18532 // Make sure the callee and caller calling conventions are eligible for tco.
18533 const Function *Caller = CI->getParent()->getParent();
18534 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18535 CI->getCallingConv()))
18536 return false;
18537
18538 // If the function is local then we have a good chance at tail-calling it
18539 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18540}
18541
18542bool PPCTargetLowering::
18543isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18544 const Value *Mask = AndI.getOperand(1);
18545 // If the mask is suitable for andi. or andis. we should sink the and.
18546 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18547 // Can't handle constants wider than 64-bits.
18548 if (CI->getBitWidth() > 64)
18549 return false;
18550 int64_t ConstVal = CI->getZExtValue();
18551 return isUInt<16>(ConstVal) ||
18552 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18553 }
18554
18555 // For non-constant masks, we can always use the record-form and.
18556 return true;
18557}
18558
18559/// getAddrModeForFlags - Based on the set of address flags, select the most
18560/// optimal instruction format to match by.
18561PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18562 // This is not a node we should be handling here.
18563 if (Flags == PPC::MOF_None)
18564 return PPC::AM_None;
18565 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18566 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18567 if ((Flags & FlagSet) == FlagSet)
18568 return PPC::AM_DForm;
18569 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18570 if ((Flags & FlagSet) == FlagSet)
18571 return PPC::AM_DSForm;
18572 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18573 if ((Flags & FlagSet) == FlagSet)
18574 return PPC::AM_DQForm;
18575 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18576 if ((Flags & FlagSet) == FlagSet)
18577 return PPC::AM_PrefixDForm;
18578 // If no other forms are selected, return an X-Form as it is the most
18579 // general addressing mode.
18580 return PPC::AM_XForm;
18581}
18582
18583/// Set alignment flags based on whether or not the Frame Index is aligned.
18584/// Utilized when computing flags for address computation when selecting
18585/// load and store instructions.
18586static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18587 SelectionDAG &DAG) {
18588 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18589 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18590 if (!FI)
18591 return;
18593 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18594 // If this is (add $FI, $S16Imm), the alignment flags are already set
18595 // based on the immediate. We just need to clear the alignment flags
18596 // if the FI alignment is weaker.
18597 if ((FrameIndexAlign % 4) != 0)
18598 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18599 if ((FrameIndexAlign % 16) != 0)
18600 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18601 // If the address is a plain FrameIndex, set alignment flags based on
18602 // FI alignment.
18603 if (!IsAdd) {
18604 if ((FrameIndexAlign % 4) == 0)
18605 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18606 if ((FrameIndexAlign % 16) == 0)
18607 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18608 }
18609}
18610
18611/// Given a node, compute flags that are used for address computation when
18612/// selecting load and store instructions. The flags computed are stored in
18613/// FlagSet. This function takes into account whether the node is a constant,
18614/// an ADD, OR, or a constant, and computes the address flags accordingly.
18615static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18616 SelectionDAG &DAG) {
18617 // Set the alignment flags for the node depending on if the node is
18618 // 4-byte or 16-byte aligned.
18619 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18620 if ((Imm & 0x3) == 0)
18621 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18622 if ((Imm & 0xf) == 0)
18623 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18624 };
18625
18626 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18627 // All 32-bit constants can be computed as LIS + Disp.
18628 const APInt &ConstImm = CN->getAPIntValue();
18629 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18630 FlagSet |= PPC::MOF_AddrIsSImm32;
18631 SetAlignFlagsForImm(ConstImm.getZExtValue());
18632 setAlignFlagsForFI(N, FlagSet, DAG);
18633 }
18634 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18635 FlagSet |= PPC::MOF_RPlusSImm34;
18636 else // Let constant materialization handle large constants.
18637 FlagSet |= PPC::MOF_NotAddNorCst;
18638 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18639 // This address can be represented as an addition of:
18640 // - Register + Imm16 (possibly a multiple of 4/16)
18641 // - Register + Imm34
18642 // - Register + PPCISD::Lo
18643 // - Register + Register
18644 // In any case, we won't have to match this as Base + Zero.
18645 SDValue RHS = N.getOperand(1);
18646 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18647 const APInt &ConstImm = CN->getAPIntValue();
18648 if (ConstImm.isSignedIntN(16)) {
18649 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18650 SetAlignFlagsForImm(ConstImm.getZExtValue());
18651 setAlignFlagsForFI(N, FlagSet, DAG);
18652 }
18653 if (ConstImm.isSignedIntN(34))
18654 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18655 else
18656 FlagSet |= PPC::MOF_RPlusR; // Register.
18657 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18658 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18659 else
18660 FlagSet |= PPC::MOF_RPlusR;
18661 } else { // The address computation is not a constant or an addition.
18662 setAlignFlagsForFI(N, FlagSet, DAG);
18663 FlagSet |= PPC::MOF_NotAddNorCst;
18664 }
18665}
18666
18667static bool isPCRelNode(SDValue N) {
18668 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18669 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18670 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18671 isValidPCRelNode<JumpTableSDNode>(N) ||
18672 isValidPCRelNode<BlockAddressSDNode>(N));
18673}
18674
18675/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18676/// the address flags of the load/store instruction that is to be matched.
18677unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18678 SelectionDAG &DAG) const {
18679 unsigned FlagSet = PPC::MOF_None;
18680
18681 // Compute subtarget flags.
18682 if (!Subtarget.hasP9Vector())
18683 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18684 else
18685 FlagSet |= PPC::MOF_SubtargetP9;
18686
18687 if (Subtarget.hasPrefixInstrs())
18688 FlagSet |= PPC::MOF_SubtargetP10;
18689
18690 if (Subtarget.hasSPE())
18691 FlagSet |= PPC::MOF_SubtargetSPE;
18692
18693 // Check if we have a PCRel node and return early.
18694 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18695 return FlagSet;
18696
18697 // If the node is the paired load/store intrinsics, compute flags for
18698 // address computation and return early.
18699 unsigned ParentOp = Parent->getOpcode();
18700 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18701 (ParentOp == ISD::INTRINSIC_VOID))) {
18702 unsigned ID = Parent->getConstantOperandVal(1);
18703 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18704 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18705 ? Parent->getOperand(2)
18706 : Parent->getOperand(3);
18707 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18708 FlagSet |= PPC::MOF_Vector;
18709 return FlagSet;
18710 }
18711 }
18712
18713 // Mark this as something we don't want to handle here if it is atomic
18714 // or pre-increment instruction.
18715 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18716 if (LSB->isIndexed())
18717 return PPC::MOF_None;
18718
18719 // Compute in-memory type flags. This is based on if there are scalars,
18720 // floats or vectors.
18721 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18722 assert(MN && "Parent should be a MemSDNode!");
18723 EVT MemVT = MN->getMemoryVT();
18724 unsigned Size = MemVT.getSizeInBits();
18725 if (MemVT.isScalarInteger()) {
18726 assert(Size <= 128 &&
18727 "Not expecting scalar integers larger than 16 bytes!");
18728 if (Size < 32)
18729 FlagSet |= PPC::MOF_SubWordInt;
18730 else if (Size == 32)
18731 FlagSet |= PPC::MOF_WordInt;
18732 else
18733 FlagSet |= PPC::MOF_DoubleWordInt;
18734 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18735 if (Size == 128)
18736 FlagSet |= PPC::MOF_Vector;
18737 else if (Size == 256) {
18738 assert(Subtarget.pairedVectorMemops() &&
18739 "256-bit vectors are only available when paired vector memops is "
18740 "enabled!");
18741 FlagSet |= PPC::MOF_Vector;
18742 } else
18743 llvm_unreachable("Not expecting illegal vectors!");
18744 } else { // Floating point type: can be scalar, f128 or vector types.
18745 if (Size == 32 || Size == 64)
18746 FlagSet |= PPC::MOF_ScalarFloat;
18747 else if (MemVT == MVT::f128 || MemVT.isVector())
18748 FlagSet |= PPC::MOF_Vector;
18749 else
18750 llvm_unreachable("Not expecting illegal scalar floats!");
18751 }
18752
18753 // Compute flags for address computation.
18754 computeFlagsForAddressComputation(N, FlagSet, DAG);
18755
18756 // Compute type extension flags.
18757 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18758 switch (LN->getExtensionType()) {
18759 case ISD::SEXTLOAD:
18760 FlagSet |= PPC::MOF_SExt;
18761 break;
18762 case ISD::EXTLOAD:
18763 case ISD::ZEXTLOAD:
18764 FlagSet |= PPC::MOF_ZExt;
18765 break;
18766 case ISD::NON_EXTLOAD:
18767 FlagSet |= PPC::MOF_NoExt;
18768 break;
18769 }
18770 } else
18771 FlagSet |= PPC::MOF_NoExt;
18772
18773 // For integers, no extension is the same as zero extension.
18774 // We set the extension mode to zero extension so we don't have
18775 // to add separate entries in AddrModesMap for loads and stores.
18776 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18777 FlagSet |= PPC::MOF_ZExt;
18778 FlagSet &= ~PPC::MOF_NoExt;
18779 }
18780
18781 // If we don't have prefixed instructions, 34-bit constants should be
18782 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18783 bool IsNonP1034BitConst =
18785 FlagSet) == PPC::MOF_RPlusSImm34;
18786 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18787 IsNonP1034BitConst)
18788 FlagSet |= PPC::MOF_NotAddNorCst;
18789
18790 return FlagSet;
18791}
18792
18793/// SelectForceXFormMode - Given the specified address, force it to be
18794/// represented as an indexed [r+r] operation (an XForm instruction).
18796 SDValue &Base,
18797 SelectionDAG &DAG) const {
18798
18800 int16_t ForceXFormImm = 0;
18801 if (provablyDisjointOr(DAG, N) &&
18802 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18803 Disp = N.getOperand(0);
18804 Base = N.getOperand(1);
18805 return Mode;
18806 }
18807
18808 // If the address is the result of an add, we will utilize the fact that the
18809 // address calculation includes an implicit add. However, we can reduce
18810 // register pressure if we do not materialize a constant just for use as the
18811 // index register. We only get rid of the add if it is not an add of a
18812 // value and a 16-bit signed constant and both have a single use.
18813 if (N.getOpcode() == ISD::ADD &&
18814 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18815 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18816 Disp = N.getOperand(0);
18817 Base = N.getOperand(1);
18818 return Mode;
18819 }
18820
18821 // Otherwise, use R0 as the base register.
18822 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18823 N.getValueType());
18824 Base = N;
18825
18826 return Mode;
18827}
18828
18830 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18831 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18832 EVT ValVT = Val.getValueType();
18833 // If we are splitting a scalar integer into f64 parts (i.e. so they
18834 // can be placed into VFRC registers), we need to zero extend and
18835 // bitcast the values. This will ensure the value is placed into a
18836 // VSR using direct moves or stack operations as needed.
18837 if (PartVT == MVT::f64 &&
18838 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18839 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18840 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18841 Parts[0] = Val;
18842 return true;
18843 }
18844 return false;
18845}
18846
18847SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18848 SelectionDAG &DAG) const {
18849 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18851 EVT RetVT = Op.getValueType();
18852 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18853 SDValue Callee =
18854 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18855 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
18858 for (const SDValue &N : Op->op_values()) {
18859 EVT ArgVT = N.getValueType();
18860 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18861 Entry.Node = N;
18862 Entry.Ty = ArgTy;
18863 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
18864 Entry.IsZExt = !Entry.IsSExt;
18865 Args.push_back(Entry);
18866 }
18867
18868 SDValue InChain = DAG.getEntryNode();
18869 SDValue TCChain = InChain;
18870 const Function &F = DAG.getMachineFunction().getFunction();
18871 bool isTailCall =
18872 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18873 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18874 if (isTailCall)
18875 InChain = TCChain;
18876 CLI.setDebugLoc(SDLoc(Op))
18877 .setChain(InChain)
18878 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18879 .setTailCall(isTailCall)
18880 .setSExtResult(SignExtend)
18881 .setZExtResult(!SignExtend)
18883 return TLI.LowerCallTo(CLI).first;
18884}
18885
18886SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18887 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18888 SelectionDAG &DAG) const {
18889 if (Op.getValueType() == MVT::f32)
18890 return lowerToLibCall(LibCallFloatName, Op, DAG);
18891
18892 if (Op.getValueType() == MVT::f64)
18893 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18894
18895 return SDValue();
18896}
18897
18898bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18899 SDNodeFlags Flags = Op.getNode()->getFlags();
18900 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18901 Flags.hasNoNaNs() && Flags.hasNoInfs();
18902}
18903
18904bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18905 return Op.getNode()->getFlags().hasApproximateFuncs();
18906}
18907
18908bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18910}
18911
18912SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18913 const char *LibCallFloatName,
18914 const char *LibCallDoubleNameFinite,
18915 const char *LibCallFloatNameFinite,
18916 SDValue Op,
18917 SelectionDAG &DAG) const {
18918 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18919 return SDValue();
18920
18921 if (!isLowringToMASSFiniteSafe(Op))
18922 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18923 DAG);
18924
18925 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18926 LibCallDoubleNameFinite, Op, DAG);
18927}
18928
18929SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18930 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18931 "__xl_powf_finite", Op, DAG);
18932}
18933
18934SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18935 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18936 "__xl_sinf_finite", Op, DAG);
18937}
18938
18939SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18940 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18941 "__xl_cosf_finite", Op, DAG);
18942}
18943
18944SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18945 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18946 "__xl_logf_finite", Op, DAG);
18947}
18948
18949SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18950 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18951 "__xl_log10f_finite", Op, DAG);
18952}
18953
18954SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18955 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18956 "__xl_expf_finite", Op, DAG);
18957}
18958
18959// If we happen to match to an aligned D-Form, check if the Frame Index is
18960// adequately aligned. If it is not, reset the mode to match to X-Form.
18961static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18962 PPC::AddrMode &Mode) {
18963 if (!isa<FrameIndexSDNode>(N))
18964 return;
18965 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18966 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18967 Mode = PPC::AM_XForm;
18968}
18969
18970/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18971/// compute the address flags of the node, get the optimal address mode based
18972/// on the flags, and set the Base and Disp based on the address mode.
18974 SDValue N, SDValue &Disp,
18975 SDValue &Base,
18976 SelectionDAG &DAG,
18977 MaybeAlign Align) const {
18978 SDLoc DL(Parent);
18979
18980 // Compute the address flags.
18981 unsigned Flags = computeMOFlags(Parent, N, DAG);
18982
18983 // Get the optimal address mode based on the Flags.
18984 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18985
18986 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18987 // Select an X-Form load if it is not.
18988 setXFormForUnalignedFI(N, Flags, Mode);
18989
18990 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18991 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18992 assert(Subtarget.isUsingPCRelativeCalls() &&
18993 "Must be using PC-Relative calls when a valid PC-Relative node is "
18994 "present!");
18995 Mode = PPC::AM_PCRel;
18996 }
18997
18998 // Set Base and Disp accordingly depending on the address mode.
18999 switch (Mode) {
19000 case PPC::AM_DForm:
19001 case PPC::AM_DSForm:
19002 case PPC::AM_DQForm: {
19003 // This is a register plus a 16-bit immediate. The base will be the
19004 // register and the displacement will be the immediate unless it
19005 // isn't sufficiently aligned.
19006 if (Flags & PPC::MOF_RPlusSImm16) {
19007 SDValue Op0 = N.getOperand(0);
19008 SDValue Op1 = N.getOperand(1);
19009 int16_t Imm = Op1->getAsZExtVal();
19010 if (!Align || isAligned(*Align, Imm)) {
19011 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19012 Base = Op0;
19013 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
19014 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19015 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19016 }
19017 break;
19018 }
19019 }
19020 // This is a register plus the @lo relocation. The base is the register
19021 // and the displacement is the global address.
19022 else if (Flags & PPC::MOF_RPlusLo) {
19023 Disp = N.getOperand(1).getOperand(0); // The global address.
19028 Base = N.getOperand(0);
19029 break;
19030 }
19031 // This is a constant address at most 32 bits. The base will be
19032 // zero or load-immediate-shifted and the displacement will be
19033 // the low 16 bits of the address.
19034 else if (Flags & PPC::MOF_AddrIsSImm32) {
19035 auto *CN = cast<ConstantSDNode>(N);
19036 EVT CNType = CN->getValueType(0);
19037 uint64_t CNImm = CN->getZExtValue();
19038 // If this address fits entirely in a 16-bit sext immediate field, codegen
19039 // this as "d, 0".
19040 int16_t Imm;
19041 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19042 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19043 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19044 CNType);
19045 break;
19046 }
19047 // Handle 32-bit sext immediate with LIS + Addr mode.
19048 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19049 (!Align || isAligned(*Align, CNImm))) {
19050 int32_t Addr = (int32_t)CNImm;
19051 // Otherwise, break this down into LIS + Disp.
19052 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19053 Base =
19054 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
19055 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19056 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19057 break;
19058 }
19059 }
19060 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19061 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19062 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
19063 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19064 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19065 } else
19066 Base = N;
19067 break;
19068 }
19069 case PPC::AM_PrefixDForm: {
19070 int64_t Imm34 = 0;
19071 unsigned Opcode = N.getOpcode();
19072 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19073 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19074 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19075 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19076 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19077 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19078 else
19079 Base = N.getOperand(0);
19080 } else if (isIntS34Immediate(N, Imm34)) {
19081 // The address is a 34-bit signed immediate.
19082 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19083 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19084 }
19085 break;
19086 }
19087 case PPC::AM_PCRel: {
19088 // When selecting PC-Relative instructions, "Base" is not utilized as
19089 // we select the address as [PC+imm].
19090 Disp = N;
19091 break;
19092 }
19093 case PPC::AM_None:
19094 break;
19095 default: { // By default, X-Form is always available to be selected.
19096 // When a frame index is not aligned, we also match by XForm.
19097 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
19098 Base = FI ? N : N.getOperand(1);
19099 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19100 N.getValueType())
19101 : N.getOperand(0);
19102 break;
19103 }
19104 }
19105 return Mode;
19106}
19107
19109 bool Return,
19110 bool IsVarArg) const {
19111 switch (CC) {
19112 case CallingConv::Cold:
19113 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19114 default:
19115 return CC_PPC64_ELF;
19116 }
19117}
19118
19120 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19121}
19122
19125 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19126 if (shouldInlineQuadwordAtomics() && Size == 128)
19128
19129 switch (AI->getOperation()) {
19135 default:
19137 }
19138
19139 llvm_unreachable("unreachable atomicrmw operation");
19140}
19141
19144 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19145 if (shouldInlineQuadwordAtomics() && Size == 128)
19148}
19149
19150static Intrinsic::ID
19152 switch (BinOp) {
19153 default:
19154 llvm_unreachable("Unexpected AtomicRMW BinOp");
19156 return Intrinsic::ppc_atomicrmw_xchg_i128;
19157 case AtomicRMWInst::Add:
19158 return Intrinsic::ppc_atomicrmw_add_i128;
19159 case AtomicRMWInst::Sub:
19160 return Intrinsic::ppc_atomicrmw_sub_i128;
19161 case AtomicRMWInst::And:
19162 return Intrinsic::ppc_atomicrmw_and_i128;
19163 case AtomicRMWInst::Or:
19164 return Intrinsic::ppc_atomicrmw_or_i128;
19165 case AtomicRMWInst::Xor:
19166 return Intrinsic::ppc_atomicrmw_xor_i128;
19168 return Intrinsic::ppc_atomicrmw_nand_i128;
19169 }
19170}
19171
19173 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19174 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19175 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19176 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19177 Type *ValTy = Incr->getType();
19178 assert(ValTy->getPrimitiveSizeInBits() == 128);
19179 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19180 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19181 Value *IncrHi =
19182 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19183 Value *LoHi = Builder.CreateIntrinsic(
19185 {AlignedAddr, IncrLo, IncrHi});
19186 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19187 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19188 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19189 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19190 return Builder.CreateOr(
19191 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19192}
19193
19195 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19196 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19197 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19198 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19199 Type *ValTy = CmpVal->getType();
19200 assert(ValTy->getPrimitiveSizeInBits() == 128);
19201 Function *IntCmpXchg =
19202 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19203 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19204 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19205 Value *CmpHi =
19206 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19207 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19208 Value *NewHi =
19209 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19210 emitLeadingFence(Builder, CI, Ord);
19211 Value *LoHi =
19212 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19213 emitTrailingFence(Builder, CI, Ord);
19214 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19215 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19216 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19217 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19218 return Builder.CreateOr(
19219 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19220}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition: IVUsers.cpp:48
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5463
bool isDenormal() const
Definition: APFloat.h:1446
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1700
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
BinOp getOperation() const
Definition: Instructions.h:805
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:893
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1881
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1399
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1261
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1334
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1267
unsigned arg_size() const
Definition: InstrTypes.h:1284
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:843
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:373
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
arg_iterator arg_begin()
Definition: Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
size_t arg_size() const
Definition: Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:264
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:268
bool hasHiddenVisibility() const
Definition: GlobalValue.h:251
StringRef getSection() const
Definition: Globals.cpp:189
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:632
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:130
bool hasComdat() const
Definition: GlobalValue.h:242
Type * getValueType() const
Definition: GlobalValue.h:297
bool hasProtectedVisibility() const
Definition: GlobalValue.h:252
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:176
bool isUnordered() const
Definition: Instructions.h:249
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1073
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:304
bool is32BitELFABI() const
Definition: PPCSubtarget.h:224
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:266
MVT getScalarIntVT() const
Definition: PPCSubtarget.h:253
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool useSoftFloat() const
Definition: PPCSubtarget.h:179
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:207
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:260
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:278
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:211
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:284
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:296
bool is64BitELFABI() const
Definition: PPCSubtarget.h:223
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:302
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:157
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:272
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:499
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:755
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:255
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1168
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1165
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1211
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1398
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1498
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1643
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1649
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:144
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:192
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:195
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:170
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:201
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:152
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:119
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:148
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:198
@ MO_TPREL_HA
Definition: PPC.h:177
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:111
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:186
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:138
@ MO_TPREL_LO
Definition: PPC.h:176
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:173
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:164
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:189
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:133
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:158
@ MO_HA
Definition: PPC.h:174
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:115
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:48
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:298
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215