LLVM 20.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
108static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisableSCO("disable-ppc-sco",
118cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119
120static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125
126static cl::opt<bool>
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden);
130
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden);
135
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
139
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142 cl::desc("max depth when checking alias info in GatherAllAliases()"));
143
145 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147 "function to use initial-exec"));
148
149STATISTIC(NumTailCalls, "Number of tail calls");
150STATISTIC(NumSiblingCalls, "Number of sibling calls");
151STATISTIC(ShufflesHandledWithVPERM,
152 "Number of shuffles lowered to a VPERM or XXPERM");
153STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
154
155static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
156
157static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
158
159static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
160
161// A faster local-[exec|dynamic] TLS access sequence (enabled with the
162// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
163// variables; consistent with the IBM XL compiler, we apply a max size of
164// slightly under 32KB.
166
167// FIXME: Remove this once the bug has been fixed!
169
171 const PPCSubtarget &STI)
172 : TargetLowering(TM), Subtarget(STI) {
173 // Initialize map that relates the PPC addressing modes to the computed flags
174 // of a load/store instruction. The map is used to determine the optimal
175 // addressing mode when selecting load and stores.
176 initializeAddrModeMap();
177 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
178 // arguments are at least 4/8 bytes aligned.
179 bool isPPC64 = Subtarget.isPPC64();
180 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
196 // Match BITREVERSE to customized fast code sequence in the td file.
199
200 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
202
203 // Custom lower inline assembly to check for special registers.
206
207 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
208 for (MVT VT : MVT::integer_valuetypes()) {
211 }
212
213 if (Subtarget.isISA3_0()) {
214 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
215 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
216 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
217 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
218 } else {
219 // No extending loads from f16 or HW conversions back and forth.
220 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
223 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
226 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
227 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
228 }
229
230 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
231
232 // PowerPC has pre-inc load and store's.
243 if (!Subtarget.hasSPE()) {
248 }
249
250 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
251 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
252 for (MVT VT : ScalarIntVTs) {
257 }
258
259 if (Subtarget.useCRBits()) {
261
262 if (isPPC64 || Subtarget.hasFPCVT()) {
265 isPPC64 ? MVT::i64 : MVT::i32);
268 isPPC64 ? MVT::i64 : MVT::i32);
269
272 isPPC64 ? MVT::i64 : MVT::i32);
275 isPPC64 ? MVT::i64 : MVT::i32);
276
279 isPPC64 ? MVT::i64 : MVT::i32);
282 isPPC64 ? MVT::i64 : MVT::i32);
283
286 isPPC64 ? MVT::i64 : MVT::i32);
289 isPPC64 ? MVT::i64 : MVT::i32);
290 } else {
295 }
296
297 // PowerPC does not support direct load/store of condition registers.
300
301 // FIXME: Remove this once the ANDI glue bug is fixed:
302 if (ANDIGlueBug)
304
305 for (MVT VT : MVT::integer_valuetypes()) {
308 setTruncStoreAction(VT, MVT::i1, Expand);
309 }
310
311 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
312 }
313
314 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
315 // PPC (the libcall is not available).
320
321 // We do not currently implement these libm ops for PowerPC.
322 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
323 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
324 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
325 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
328
329 // PowerPC has no SREM/UREM instructions unless we are on P9
330 // On P9 we may use a hardware instruction to compute the remainder.
331 // When the result of both the remainder and the division is required it is
332 // more efficient to compute the remainder from the result of the division
333 // rather than use the remainder instruction. The instructions are legalized
334 // directly because the DivRemPairsPass performs the transformation at the IR
335 // level.
336 if (Subtarget.isISA3_0()) {
341 } else {
346 }
347
348 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
357
358 // Handle constrained floating-point operations of scalar.
359 // TODO: Handle SPE specific operation.
365
370
371 if (!Subtarget.hasSPE()) {
374 }
375
376 if (Subtarget.hasVSX()) {
379 }
380
381 if (Subtarget.hasFSQRT()) {
384 }
385
386 if (Subtarget.hasFPRND()) {
391
396 }
397
398 // We don't support sin/cos/sqrt/fmod/pow
409
410 // MASS transformation for LLVM intrinsics with replicating fast-math flag
411 // to be consistent to PPCGenScalarMASSEntries pass
412 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
425 }
426
427 if (Subtarget.hasSPE()) {
430 } else {
431 setOperationAction(ISD::FMA , MVT::f64, Legal);
432 setOperationAction(ISD::FMA , MVT::f32, Legal);
433 }
434
435 if (Subtarget.hasSPE())
436 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
437
439
440 // If we're enabling GP optimizations, use hardware square root
441 if (!Subtarget.hasFSQRT() &&
442 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
443 Subtarget.hasFRE()))
445
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
448 Subtarget.hasFRES()))
450
451 if (Subtarget.hasFCPSGN()) {
454 } else {
457 }
458
459 if (Subtarget.hasFPRND()) {
464
469 }
470
471 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
472 // instruction xxbrd to speed up scalar BSWAP64.
473 if (Subtarget.isISA3_1()) {
476 } else {
479 ISD::BSWAP, MVT::i64,
480 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
481 }
482
483 // CTPOP or CTTZ were introduced in P8/P9 respectively
484 if (Subtarget.isISA3_0()) {
485 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
486 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
487 } else {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
490 }
491
492 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
495 } else {
498 }
499
500 // PowerPC does not have ROTR
503
504 if (!Subtarget.useCRBits()) {
505 // PowerPC does not have Select
510 }
511
512 // PowerPC wants to turn select_cc of FP into fsel when possible.
515
516 // PowerPC wants to optimize integer setcc a bit
517 if (!Subtarget.useCRBits())
519
520 if (Subtarget.hasFPU()) {
524
528 }
529
530 // PowerPC does not have BRCOND which requires SetCC
531 if (!Subtarget.useCRBits())
533
535
536 if (Subtarget.hasSPE()) {
537 // SPE has built-in conversions
544
545 // SPE supports signaling compare of f32/f64.
548 } else {
549 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
552
553 // PowerPC does not have [U|S]INT_TO_FP
558 }
559
560 if (Subtarget.hasDirectMove() && isPPC64) {
565 if (TM.Options.UnsafeFPMath) {
574 }
575 } else {
580 }
581
582 // We cannot sextinreg(i1). Expand to shifts.
584
585 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
586 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
587 // support continuation, user-level threading, and etc.. As a result, no
588 // other SjLj exception interfaces are implemented and please don't build
589 // your own exception handling based on them.
590 // LLVM/Clang supports zero-cost DWARF exception handling.
593
594 // We want to legalize GlobalAddress and ConstantPool nodes into the
595 // appropriate instructions to materialize the address.
606
607 // TRAP is legal.
608 setOperationAction(ISD::TRAP, MVT::Other, Legal);
609
610 // TRAMPOLINE is custom lowered.
613
614 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
616
617 if (Subtarget.is64BitELFABI()) {
618 // VAARG always uses double-word chunks, so promote anything smaller.
620 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
622 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
624 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
628 } else if (Subtarget.is32BitELFABI()) {
629 // VAARG is custom lowered with the 32-bit SVR4 ABI.
632 } else
634
635 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
636 if (Subtarget.is32BitELFABI())
638 else
640
641 // Use the default implementation.
642 setOperationAction(ISD::VAEND , MVT::Other, Expand);
651
652 // We want to custom lower some of our intrinsics.
658
659 // To handle counter-based loop conditions.
661
666
667 // Comparisons that require checking two conditions.
668 if (Subtarget.hasSPE()) {
673 }
686
689
690 if (Subtarget.has64BitSupport()) {
691 // They also have instructions for converting between i64 and fp.
700 // This is just the low 32 bits of a (signed) fp->i64 conversion.
701 // We cannot do this with Promote because i64 is not a legal type.
704
705 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
708 }
709 } else {
710 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
711 if (Subtarget.hasSPE()) {
714 } else {
717 }
718 }
719
720 // With the instructions enabled under FPCVT, we can do everything.
721 if (Subtarget.hasFPCVT()) {
722 if (Subtarget.has64BitSupport()) {
731 }
732
741 }
742
743 if (Subtarget.use64BitRegs()) {
744 // 64-bit PowerPC implementations can support i64 types directly
745 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
746 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
748 // 64-bit PowerPC wants to expand i128 shifts itself.
752 } else {
753 // 32-bit PowerPC wants to expand i64 shifts itself.
757 }
758
759 // PowerPC has better expansions for funnel shifts than the generic
760 // TargetLowering::expandFunnelShift.
761 if (Subtarget.has64BitSupport()) {
764 }
767
768 if (Subtarget.hasVSX()) {
773 }
774
775 if (Subtarget.hasAltivec()) {
776 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
781 }
782 // First set operation action for all vector types to expand. Then we
783 // will selectively turn on ones that can be effectively codegen'd.
785 // add/sub are legal for all supported vector VT's.
788
789 // For v2i64, these are only valid with P8Vector. This is corrected after
790 // the loop.
791 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
796 }
797 else {
802 }
803
804 if (Subtarget.hasVSX()) {
807 }
808
809 // Vector instructions introduced in P8
810 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
813 }
814 else {
817 }
818
819 // Vector instructions introduced in P9
820 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
822 else
824
825 // We promote all shuffles to v16i8.
827 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
828
829 // We promote all non-typed operations to v4i32.
831 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
833 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
835 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
837 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
839 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
842 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
844 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
845
846 // No other operations are legal.
885
886 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
887 setTruncStoreAction(VT, InnerVT, Expand);
890 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
891 }
892 }
894 if (!Subtarget.hasP8Vector()) {
895 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
896 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
897 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
898 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
899 }
900
901 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
902 // with merges, splats, etc.
904
905 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
906 // are cheap, so handle them before they get expanded to scalar.
912
913 setOperationAction(ISD::AND , MVT::v4i32, Legal);
914 setOperationAction(ISD::OR , MVT::v4i32, Legal);
915 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
916 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
918 Subtarget.useCRBits() ? Legal : Expand);
919 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
929 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
932
933 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
934 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
935 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
936 if (Subtarget.hasAltivec())
937 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
939 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
940 if (Subtarget.hasP8Altivec())
941 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
942
943 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
944 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
945 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
946 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
947
948 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
949 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
950
951 if (Subtarget.hasVSX()) {
952 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
953 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
955 }
956
957 if (Subtarget.hasP8Altivec())
958 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
959 else
960 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
961
962 if (Subtarget.isISA3_1()) {
963 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
964 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
965 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
966 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
967 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
968 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
969 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
970 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
971 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
972 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
973 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
974 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
975 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
977 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
978 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
979 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
980 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
981 }
982
983 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
984 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
985
988
993
994 // Altivec does not contain unordered floating-point compare instructions
995 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
997 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
999
1000 if (Subtarget.hasVSX()) {
1003 if (Subtarget.hasP8Vector()) {
1006 }
1007 if (Subtarget.hasDirectMove() && isPPC64) {
1016 }
1018
1019 // The nearbyint variants are not allowed to raise the inexact exception
1020 // so we can only code-gen them with unsafe math.
1021 if (TM.Options.UnsafeFPMath) {
1024 }
1025
1026 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1027 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1028 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1030 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1031 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1034
1036 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1037 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1040
1041 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1043
1044 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1046
1047 // Share the Altivec comparison restrictions.
1048 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1049 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1050 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1051 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1052
1053 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1054 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1055
1057
1058 if (Subtarget.hasP8Vector())
1059 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1060
1061 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1062
1063 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1064 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1065 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1066
1067 if (Subtarget.hasP8Altivec()) {
1068 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1069 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1070 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1071
1072 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073 // SRL, but not for SRA because of the instructions available:
1074 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1075 // doing
1076 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1077 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1078 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1079
1080 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1081 }
1082 else {
1083 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1084 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1085 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1086
1087 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1088
1089 // VSX v2i64 only supports non-arithmetic operations.
1090 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1091 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1092 }
1093
1094 if (Subtarget.isISA3_1())
1095 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1096 else
1097 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1098
1099 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1102 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1103
1105
1114
1115 // Custom handling for partial vectors of integers converted to
1116 // floating point. We already have optimal handling for v2i32 through
1117 // the DAG combine, so those aren't necessary.
1134
1135 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1136 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1137 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1138 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1141
1144
1145 // Handle constrained floating-point operations of vector.
1146 // The predictor is `hasVSX` because altivec instruction has
1147 // no exception but VSX vector instruction has.
1161
1175
1176 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1177 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1178
1179 for (MVT FPT : MVT::fp_valuetypes())
1180 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1181
1182 // Expand the SELECT to SELECT_CC
1184
1185 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1186 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1187
1188 // No implementation for these ops for PowerPC.
1190 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1191 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1192 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1194 setOperationAction(ISD::FREM, MVT::f128, Expand);
1195 }
1196
1197 if (Subtarget.hasP8Altivec()) {
1198 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1199 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1200 }
1201
1202 if (Subtarget.hasP9Vector()) {
1205
1206 // Test data class instructions store results in CR bits.
1207 if (Subtarget.useCRBits()) {
1211 }
1212
1213 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214 // SRL, but not for SRA because of the instructions available:
1215 // VS{RL} and VS{RL}O.
1216 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1217 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1218 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1219
1220 setOperationAction(ISD::FADD, MVT::f128, Legal);
1221 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1222 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1223 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1225
1226 setOperationAction(ISD::FMA, MVT::f128, Legal);
1233
1235 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1237 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1240
1244
1245 // Handle constrained floating-point operations of fp128
1262 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1263 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1264 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1265 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1266 } else if (Subtarget.hasVSX()) {
1269
1270 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1271 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1272
1273 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274 // fp_to_uint and int_to_fp.
1277
1278 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1279 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1280 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1281 setOperationAction(ISD::FABS, MVT::f128, Expand);
1283 setOperationAction(ISD::FMA, MVT::f128, Expand);
1285
1286 // Expand the fp_extend if the target type is fp128.
1289
1290 // Expand the fp_round if the source type is fp128.
1291 for (MVT VT : {MVT::f32, MVT::f64}) {
1294 }
1295
1300
1301 // Lower following f128 select_cc pattern:
1302 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1304
1305 // We need to handle f128 SELECT_CC with integer result type.
1307 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1308 }
1309
1310 if (Subtarget.hasP9Altivec()) {
1311 if (Subtarget.isISA3_1()) {
1316 } else {
1319 }
1327
1328 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1329 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1330 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1331 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1332 }
1333
1334 if (Subtarget.hasP10Vector()) {
1336 }
1337 }
1338
1339 if (Subtarget.pairedVectorMemops()) {
1340 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1341 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1342 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1343 }
1344 if (Subtarget.hasMMA()) {
1345 if (Subtarget.isISAFuture())
1346 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1347 else
1348 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1349 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1350 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1352 }
1353
1354 if (Subtarget.has64BitSupport())
1356
1357 if (Subtarget.isISA3_1())
1358 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1359
1360 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1361
1362 if (!isPPC64) {
1365 }
1366
1371 }
1372
1374
1375 if (Subtarget.hasAltivec()) {
1376 // Altivec instructions set fields to all zeros or all ones.
1378 }
1379
1382 else if (isPPC64)
1384 else
1386
1387 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1388
1389 // We have target-specific dag combine patterns for the following nodes:
1392 if (Subtarget.hasFPCVT())
1395 if (Subtarget.useCRBits())
1399
1401
1403
1404 if (Subtarget.useCRBits()) {
1406 }
1407
1408 setLibcallName(RTLIB::LOG_F128, "logf128");
1409 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1410 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1411 setLibcallName(RTLIB::EXP_F128, "expf128");
1412 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1413 setLibcallName(RTLIB::SIN_F128, "sinf128");
1414 setLibcallName(RTLIB::COS_F128, "cosf128");
1415 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1416 setLibcallName(RTLIB::POW_F128, "powf128");
1417 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1418 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1419 setLibcallName(RTLIB::REM_F128, "fmodf128");
1420 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1421 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1422 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1423 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1424 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1425 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1426 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1427 setLibcallName(RTLIB::RINT_F128, "rintf128");
1428 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1429 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1430 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1431 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1432 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1433
1434 if (Subtarget.isAIXABI()) {
1435 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1436 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1437 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1438 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1439 }
1440
1441 // With 32 condition bits, we don't need to sink (and duplicate) compares
1442 // aggressively in CodeGenPrep.
1443 if (Subtarget.useCRBits()) {
1446 }
1447
1448 // TODO: The default entry number is set to 64. This stops most jump table
1449 // generation on PPC. But it is good for current PPC HWs because the indirect
1450 // branch instruction mtctr to the jump table may lead to bad branch predict.
1451 // Re-evaluate this value on future HWs that can do better with mtctr.
1453
1455
1456 switch (Subtarget.getCPUDirective()) {
1457 default: break;
1458 case PPC::DIR_970:
1459 case PPC::DIR_A2:
1460 case PPC::DIR_E500:
1461 case PPC::DIR_E500mc:
1462 case PPC::DIR_E5500:
1463 case PPC::DIR_PWR4:
1464 case PPC::DIR_PWR5:
1465 case PPC::DIR_PWR5X:
1466 case PPC::DIR_PWR6:
1467 case PPC::DIR_PWR6X:
1468 case PPC::DIR_PWR7:
1469 case PPC::DIR_PWR8:
1470 case PPC::DIR_PWR9:
1471 case PPC::DIR_PWR10:
1472 case PPC::DIR_PWR11:
1476 break;
1477 }
1478
1479 if (Subtarget.enableMachineScheduler())
1481 else
1483
1485
1486 // The Freescale cores do better with aggressive inlining of memcpy and
1487 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1488 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1489 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1490 MaxStoresPerMemset = 32;
1492 MaxStoresPerMemcpy = 32;
1496 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1497 // The A2 also benefits from (very) aggressive inlining of memcpy and
1498 // friends. The overhead of a the function call, even when warm, can be
1499 // over one hundred cycles.
1500 MaxStoresPerMemset = 128;
1501 MaxStoresPerMemcpy = 128;
1502 MaxStoresPerMemmove = 128;
1503 MaxLoadsPerMemcmp = 128;
1504 } else {
1507 }
1508
1509 IsStrictFPEnabled = true;
1510
1511 // Let the subtarget (CPU) decide if a predictable select is more expensive
1512 // than the corresponding branch. This information is used in CGP to decide
1513 // when to convert selects into branches.
1515
1517}
1518
1519// *********************************** NOTE ************************************
1520// For selecting load and store instructions, the addressing modes are defined
1521// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1522// patterns to match the load the store instructions.
1523//
1524// The TD definitions for the addressing modes correspond to their respective
1525// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1526// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1527// address mode flags of a particular node. Afterwards, the computed address
1528// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1529// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1530// accordingly, based on the preferred addressing mode.
1531//
1532// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1533// MemOpFlags contains all the possible flags that can be used to compute the
1534// optimal addressing mode for load and store instructions.
1535// AddrMode contains all the possible load and store addressing modes available
1536// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1537//
1538// When adding new load and store instructions, it is possible that new address
1539// flags may need to be added into MemOpFlags, and a new addressing mode will
1540// need to be added to AddrMode. An entry of the new addressing mode (consisting
1541// of the minimal and main distinguishing address flags for the new load/store
1542// instructions) will need to be added into initializeAddrModeMap() below.
1543// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1544// need to be updated to account for selecting the optimal addressing mode.
1545// *****************************************************************************
1546/// Initialize the map that relates the different addressing modes of the load
1547/// and store instructions to a set of flags. This ensures the load/store
1548/// instruction is correctly matched during instruction selection.
1549void PPCTargetLowering::initializeAddrModeMap() {
1550 AddrModesMap[PPC::AM_DForm] = {
1551 // LWZ, STW
1556 // LBZ, LHZ, STB, STH
1561 // LHA
1566 // LFS, LFD, STFS, STFD
1571 };
1572 AddrModesMap[PPC::AM_DSForm] = {
1573 // LWA
1577 // LD, STD
1581 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1585 };
1586 AddrModesMap[PPC::AM_DQForm] = {
1587 // LXV, STXV
1591 };
1592 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1594 // TODO: Add mapping for quadword load/store.
1595}
1596
1597/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1598/// the desired ByVal argument alignment.
1599static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1600 if (MaxAlign == MaxMaxAlign)
1601 return;
1602 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1603 if (MaxMaxAlign >= 32 &&
1604 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1605 MaxAlign = Align(32);
1606 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1607 MaxAlign < 16)
1608 MaxAlign = Align(16);
1609 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1610 Align EltAlign;
1611 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1612 if (EltAlign > MaxAlign)
1613 MaxAlign = EltAlign;
1614 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1615 for (auto *EltTy : STy->elements()) {
1616 Align EltAlign;
1617 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1618 if (EltAlign > MaxAlign)
1619 MaxAlign = EltAlign;
1620 if (MaxAlign == MaxMaxAlign)
1621 break;
1622 }
1623 }
1624}
1625
1626/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1627/// function arguments in the caller parameter area.
1629 const DataLayout &DL) const {
1630 // 16byte and wider vectors are passed on 16byte boundary.
1631 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1632 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1633 if (Subtarget.hasAltivec())
1634 getMaxByValAlign(Ty, Alignment, Align(16));
1635 return Alignment.value();
1636}
1637
1639 return Subtarget.useSoftFloat();
1640}
1641
1643 return Subtarget.hasSPE();
1644}
1645
1647 return VT.isScalarInteger();
1648}
1649
1651 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1652 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1653 return false;
1654
1655 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1656 if (VTy->getScalarType()->isIntegerTy()) {
1657 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1658 if (ElemSizeInBits == 32) {
1659 Index = Subtarget.isLittleEndian() ? 2 : 1;
1660 return true;
1661 }
1662 if (ElemSizeInBits == 64) {
1663 Index = Subtarget.isLittleEndian() ? 1 : 0;
1664 return true;
1665 }
1666 }
1667 }
1668 return false;
1669}
1670
1671const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1672 switch ((PPCISD::NodeType)Opcode) {
1673 case PPCISD::FIRST_NUMBER: break;
1674 case PPCISD::FSEL: return "PPCISD::FSEL";
1675 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1676 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1677 case PPCISD::FCFID: return "PPCISD::FCFID";
1678 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1679 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1680 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1681 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1682 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1683 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1684 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1685 case PPCISD::FRE: return "PPCISD::FRE";
1686 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1687 case PPCISD::FTSQRT:
1688 return "PPCISD::FTSQRT";
1689 case PPCISD::FSQRT:
1690 return "PPCISD::FSQRT";
1691 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1692 case PPCISD::VPERM: return "PPCISD::VPERM";
1693 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1695 return "PPCISD::XXSPLTI_SP_TO_DP";
1697 return "PPCISD::XXSPLTI32DX";
1698 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1699 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1700 case PPCISD::XXPERM:
1701 return "PPCISD::XXPERM";
1702 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1703 case PPCISD::CMPB: return "PPCISD::CMPB";
1704 case PPCISD::Hi: return "PPCISD::Hi";
1705 case PPCISD::Lo: return "PPCISD::Lo";
1706 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1707 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1708 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1709 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1710 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1711 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1712 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1713 case PPCISD::SRL: return "PPCISD::SRL";
1714 case PPCISD::SRA: return "PPCISD::SRA";
1715 case PPCISD::SHL: return "PPCISD::SHL";
1716 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1717 case PPCISD::CALL: return "PPCISD::CALL";
1718 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1719 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1720 case PPCISD::CALL_RM:
1721 return "PPCISD::CALL_RM";
1723 return "PPCISD::CALL_NOP_RM";
1725 return "PPCISD::CALL_NOTOC_RM";
1726 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1727 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1728 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1729 case PPCISD::BCTRL_RM:
1730 return "PPCISD::BCTRL_RM";
1732 return "PPCISD::BCTRL_LOAD_TOC_RM";
1733 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1734 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1735 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1736 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1737 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1738 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1739 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1740 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1741 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1742 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1744 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1746 return "PPCISD::ANDI_rec_1_EQ_BIT";
1748 return "PPCISD::ANDI_rec_1_GT_BIT";
1749 case PPCISD::VCMP: return "PPCISD::VCMP";
1750 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1751 case PPCISD::LBRX: return "PPCISD::LBRX";
1752 case PPCISD::STBRX: return "PPCISD::STBRX";
1753 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1754 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1755 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1756 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1757 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1758 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1759 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1760 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1761 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1763 return "PPCISD::ST_VSR_SCAL_INT";
1764 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1765 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1766 case PPCISD::BDZ: return "PPCISD::BDZ";
1767 case PPCISD::MFFS: return "PPCISD::MFFS";
1768 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1769 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1770 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1771 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1772 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1773 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1774 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1775 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1776 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1777 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1778 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1779 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1780 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1781 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1782 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1783 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1784 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1785 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1786 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1787 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1788 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1789 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1790 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1792 return "PPCISD::PADDI_DTPREL";
1793 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1794 case PPCISD::SC: return "PPCISD::SC";
1795 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1796 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1797 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1798 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1799 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1800 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1801 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1802 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1803 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1804 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1805 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1806 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1808 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1810 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1811 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1812 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1813 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1814 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1815 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1816 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1817 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1818 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1820 return "PPCISD::STRICT_FADDRTZ";
1822 return "PPCISD::STRICT_FCTIDZ";
1824 return "PPCISD::STRICT_FCTIWZ";
1826 return "PPCISD::STRICT_FCTIDUZ";
1828 return "PPCISD::STRICT_FCTIWUZ";
1830 return "PPCISD::STRICT_FCFID";
1832 return "PPCISD::STRICT_FCFIDU";
1834 return "PPCISD::STRICT_FCFIDS";
1836 return "PPCISD::STRICT_FCFIDUS";
1837 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1838 case PPCISD::STORE_COND:
1839 return "PPCISD::STORE_COND";
1840 }
1841 return nullptr;
1842}
1843
1845 EVT VT) const {
1846 if (!VT.isVector())
1847 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1848
1850}
1851
1853 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1854 return true;
1855}
1856
1857//===----------------------------------------------------------------------===//
1858// Node matching predicates, for use by the tblgen matching code.
1859//===----------------------------------------------------------------------===//
1860
1861/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1863 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1864 return CFP->getValueAPF().isZero();
1865 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1866 // Maybe this has already been legalized into the constant pool?
1867 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1868 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1869 return CFP->getValueAPF().isZero();
1870 }
1871 return false;
1872}
1873
1874/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1875/// true if Op is undef or if it matches the specified value.
1876static bool isConstantOrUndef(int Op, int Val) {
1877 return Op < 0 || Op == Val;
1878}
1879
1880/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1881/// VPKUHUM instruction.
1882/// The ShuffleKind distinguishes between big-endian operations with
1883/// two different inputs (0), either-endian operations with two identical
1884/// inputs (1), and little-endian operations with two different inputs (2).
1885/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1887 SelectionDAG &DAG) {
1888 bool IsLE = DAG.getDataLayout().isLittleEndian();
1889 if (ShuffleKind == 0) {
1890 if (IsLE)
1891 return false;
1892 for (unsigned i = 0; i != 16; ++i)
1893 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1894 return false;
1895 } else if (ShuffleKind == 2) {
1896 if (!IsLE)
1897 return false;
1898 for (unsigned i = 0; i != 16; ++i)
1899 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1900 return false;
1901 } else if (ShuffleKind == 1) {
1902 unsigned j = IsLE ? 0 : 1;
1903 for (unsigned i = 0; i != 8; ++i)
1904 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1905 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1906 return false;
1907 }
1908 return true;
1909}
1910
1911/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1912/// VPKUWUM instruction.
1913/// The ShuffleKind distinguishes between big-endian operations with
1914/// two different inputs (0), either-endian operations with two identical
1915/// inputs (1), and little-endian operations with two different inputs (2).
1916/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1918 SelectionDAG &DAG) {
1919 bool IsLE = DAG.getDataLayout().isLittleEndian();
1920 if (ShuffleKind == 0) {
1921 if (IsLE)
1922 return false;
1923 for (unsigned i = 0; i != 16; i += 2)
1924 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1925 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1926 return false;
1927 } else if (ShuffleKind == 2) {
1928 if (!IsLE)
1929 return false;
1930 for (unsigned i = 0; i != 16; i += 2)
1931 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1932 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1933 return false;
1934 } else if (ShuffleKind == 1) {
1935 unsigned j = IsLE ? 0 : 2;
1936 for (unsigned i = 0; i != 8; i += 2)
1937 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1938 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1939 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1940 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1941 return false;
1942 }
1943 return true;
1944}
1945
1946/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1947/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1948/// current subtarget.
1949///
1950/// The ShuffleKind distinguishes between big-endian operations with
1951/// two different inputs (0), either-endian operations with two identical
1952/// inputs (1), and little-endian operations with two different inputs (2).
1953/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1955 SelectionDAG &DAG) {
1956 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1957 if (!Subtarget.hasP8Vector())
1958 return false;
1959
1960 bool IsLE = DAG.getDataLayout().isLittleEndian();
1961 if (ShuffleKind == 0) {
1962 if (IsLE)
1963 return false;
1964 for (unsigned i = 0; i != 16; i += 4)
1965 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1966 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1967 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1968 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1969 return false;
1970 } else if (ShuffleKind == 2) {
1971 if (!IsLE)
1972 return false;
1973 for (unsigned i = 0; i != 16; i += 4)
1974 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1975 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1976 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1977 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1978 return false;
1979 } else if (ShuffleKind == 1) {
1980 unsigned j = IsLE ? 0 : 4;
1981 for (unsigned i = 0; i != 8; i += 4)
1982 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1983 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1984 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1985 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1986 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1987 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1988 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1989 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1990 return false;
1991 }
1992 return true;
1993}
1994
1995/// isVMerge - Common function, used to match vmrg* shuffles.
1996///
1997static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1998 unsigned LHSStart, unsigned RHSStart) {
1999 if (N->getValueType(0) != MVT::v16i8)
2000 return false;
2001 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2002 "Unsupported merge size!");
2003
2004 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2005 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2006 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2007 LHSStart+j+i*UnitSize) ||
2008 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2009 RHSStart+j+i*UnitSize))
2010 return false;
2011 }
2012 return true;
2013}
2014
2015/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2016/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2017/// The ShuffleKind distinguishes between big-endian merges with two
2018/// different inputs (0), either-endian merges with two identical inputs (1),
2019/// and little-endian merges with two different inputs (2). For the latter,
2020/// the input operands are swapped (see PPCInstrAltivec.td).
2022 unsigned ShuffleKind, SelectionDAG &DAG) {
2023 if (DAG.getDataLayout().isLittleEndian()) {
2024 if (ShuffleKind == 1) // unary
2025 return isVMerge(N, UnitSize, 0, 0);
2026 else if (ShuffleKind == 2) // swapped
2027 return isVMerge(N, UnitSize, 0, 16);
2028 else
2029 return false;
2030 } else {
2031 if (ShuffleKind == 1) // unary
2032 return isVMerge(N, UnitSize, 8, 8);
2033 else if (ShuffleKind == 0) // normal
2034 return isVMerge(N, UnitSize, 8, 24);
2035 else
2036 return false;
2037 }
2038}
2039
2040/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2041/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2042/// The ShuffleKind distinguishes between big-endian merges with two
2043/// different inputs (0), either-endian merges with two identical inputs (1),
2044/// and little-endian merges with two different inputs (2). For the latter,
2045/// the input operands are swapped (see PPCInstrAltivec.td).
2047 unsigned ShuffleKind, SelectionDAG &DAG) {
2048 if (DAG.getDataLayout().isLittleEndian()) {
2049 if (ShuffleKind == 1) // unary
2050 return isVMerge(N, UnitSize, 8, 8);
2051 else if (ShuffleKind == 2) // swapped
2052 return isVMerge(N, UnitSize, 8, 24);
2053 else
2054 return false;
2055 } else {
2056 if (ShuffleKind == 1) // unary
2057 return isVMerge(N, UnitSize, 0, 0);
2058 else if (ShuffleKind == 0) // normal
2059 return isVMerge(N, UnitSize, 0, 16);
2060 else
2061 return false;
2062 }
2063}
2064
2065/**
2066 * Common function used to match vmrgew and vmrgow shuffles
2067 *
2068 * The indexOffset determines whether to look for even or odd words in
2069 * the shuffle mask. This is based on the of the endianness of the target
2070 * machine.
2071 * - Little Endian:
2072 * - Use offset of 0 to check for odd elements
2073 * - Use offset of 4 to check for even elements
2074 * - Big Endian:
2075 * - Use offset of 0 to check for even elements
2076 * - Use offset of 4 to check for odd elements
2077 * A detailed description of the vector element ordering for little endian and
2078 * big endian can be found at
2079 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2080 * Targeting your applications - what little endian and big endian IBM XL C/C++
2081 * compiler differences mean to you
2082 *
2083 * The mask to the shuffle vector instruction specifies the indices of the
2084 * elements from the two input vectors to place in the result. The elements are
2085 * numbered in array-access order, starting with the first vector. These vectors
2086 * are always of type v16i8, thus each vector will contain 16 elements of size
2087 * 8. More info on the shuffle vector can be found in the
2088 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2089 * Language Reference.
2090 *
2091 * The RHSStartValue indicates whether the same input vectors are used (unary)
2092 * or two different input vectors are used, based on the following:
2093 * - If the instruction uses the same vector for both inputs, the range of the
2094 * indices will be 0 to 15. In this case, the RHSStart value passed should
2095 * be 0.
2096 * - If the instruction has two different vectors then the range of the
2097 * indices will be 0 to 31. In this case, the RHSStart value passed should
2098 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2099 * to 31 specify elements in the second vector).
2100 *
2101 * \param[in] N The shuffle vector SD Node to analyze
2102 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2103 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2104 * vector to the shuffle_vector instruction
2105 * \return true iff this shuffle vector represents an even or odd word merge
2106 */
2107static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2108 unsigned RHSStartValue) {
2109 if (N->getValueType(0) != MVT::v16i8)
2110 return false;
2111
2112 for (unsigned i = 0; i < 2; ++i)
2113 for (unsigned j = 0; j < 4; ++j)
2114 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2115 i*RHSStartValue+j+IndexOffset) ||
2116 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2117 i*RHSStartValue+j+IndexOffset+8))
2118 return false;
2119 return true;
2120}
2121
2122/**
2123 * Determine if the specified shuffle mask is suitable for the vmrgew or
2124 * vmrgow instructions.
2125 *
2126 * \param[in] N The shuffle vector SD Node to analyze
2127 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2128 * \param[in] ShuffleKind Identify the type of merge:
2129 * - 0 = big-endian merge with two different inputs;
2130 * - 1 = either-endian merge with two identical inputs;
2131 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2132 * little-endian merges).
2133 * \param[in] DAG The current SelectionDAG
2134 * \return true iff this shuffle mask
2135 */
2137 unsigned ShuffleKind, SelectionDAG &DAG) {
2138 if (DAG.getDataLayout().isLittleEndian()) {
2139 unsigned indexOffset = CheckEven ? 4 : 0;
2140 if (ShuffleKind == 1) // Unary
2141 return isVMerge(N, indexOffset, 0);
2142 else if (ShuffleKind == 2) // swapped
2143 return isVMerge(N, indexOffset, 16);
2144 else
2145 return false;
2146 }
2147 else {
2148 unsigned indexOffset = CheckEven ? 0 : 4;
2149 if (ShuffleKind == 1) // Unary
2150 return isVMerge(N, indexOffset, 0);
2151 else if (ShuffleKind == 0) // Normal
2152 return isVMerge(N, indexOffset, 16);
2153 else
2154 return false;
2155 }
2156 return false;
2157}
2158
2159/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2160/// amount, otherwise return -1.
2161/// The ShuffleKind distinguishes between big-endian operations with two
2162/// different inputs (0), either-endian operations with two identical inputs
2163/// (1), and little-endian operations with two different inputs (2). For the
2164/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2165int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2166 SelectionDAG &DAG) {
2167 if (N->getValueType(0) != MVT::v16i8)
2168 return -1;
2169
2170 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2171
2172 // Find the first non-undef value in the shuffle mask.
2173 unsigned i;
2174 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2175 /*search*/;
2176
2177 if (i == 16) return -1; // all undef.
2178
2179 // Otherwise, check to see if the rest of the elements are consecutively
2180 // numbered from this value.
2181 unsigned ShiftAmt = SVOp->getMaskElt(i);
2182 if (ShiftAmt < i) return -1;
2183
2184 ShiftAmt -= i;
2185 bool isLE = DAG.getDataLayout().isLittleEndian();
2186
2187 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2188 // Check the rest of the elements to see if they are consecutive.
2189 for (++i; i != 16; ++i)
2190 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2191 return -1;
2192 } else if (ShuffleKind == 1) {
2193 // Check the rest of the elements to see if they are consecutive.
2194 for (++i; i != 16; ++i)
2195 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2196 return -1;
2197 } else
2198 return -1;
2199
2200 if (isLE)
2201 ShiftAmt = 16 - ShiftAmt;
2202
2203 return ShiftAmt;
2204}
2205
2206/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2207/// specifies a splat of a single element that is suitable for input to
2208/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2210 EVT VT = N->getValueType(0);
2211 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2212 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2213
2214 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2215 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2216
2217 // The consecutive indices need to specify an element, not part of two
2218 // different elements. So abandon ship early if this isn't the case.
2219 if (N->getMaskElt(0) % EltSize != 0)
2220 return false;
2221
2222 // This is a splat operation if each element of the permute is the same, and
2223 // if the value doesn't reference the second vector.
2224 unsigned ElementBase = N->getMaskElt(0);
2225
2226 // FIXME: Handle UNDEF elements too!
2227 if (ElementBase >= 16)
2228 return false;
2229
2230 // Check that the indices are consecutive, in the case of a multi-byte element
2231 // splatted with a v16i8 mask.
2232 for (unsigned i = 1; i != EltSize; ++i)
2233 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2234 return false;
2235
2236 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2237 if (N->getMaskElt(i) < 0) continue;
2238 for (unsigned j = 0; j != EltSize; ++j)
2239 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2240 return false;
2241 }
2242 return true;
2243}
2244
2245/// Check that the mask is shuffling N byte elements. Within each N byte
2246/// element of the mask, the indices could be either in increasing or
2247/// decreasing order as long as they are consecutive.
2248/// \param[in] N the shuffle vector SD Node to analyze
2249/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2250/// Word/DoubleWord/QuadWord).
2251/// \param[in] StepLen the delta indices number among the N byte element, if
2252/// the mask is in increasing/decreasing order then it is 1/-1.
2253/// \return true iff the mask is shuffling N byte elements.
2254static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2255 int StepLen) {
2256 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2257 "Unexpected element width.");
2258 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2259
2260 unsigned NumOfElem = 16 / Width;
2261 unsigned MaskVal[16]; // Width is never greater than 16
2262 for (unsigned i = 0; i < NumOfElem; ++i) {
2263 MaskVal[0] = N->getMaskElt(i * Width);
2264 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2265 return false;
2266 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2267 return false;
2268 }
2269
2270 for (unsigned int j = 1; j < Width; ++j) {
2271 MaskVal[j] = N->getMaskElt(i * Width + j);
2272 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2273 return false;
2274 }
2275 }
2276 }
2277
2278 return true;
2279}
2280
2281bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2282 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2283 if (!isNByteElemShuffleMask(N, 4, 1))
2284 return false;
2285
2286 // Now we look at mask elements 0,4,8,12
2287 unsigned M0 = N->getMaskElt(0) / 4;
2288 unsigned M1 = N->getMaskElt(4) / 4;
2289 unsigned M2 = N->getMaskElt(8) / 4;
2290 unsigned M3 = N->getMaskElt(12) / 4;
2291 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2292 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2293
2294 // Below, let H and L be arbitrary elements of the shuffle mask
2295 // where H is in the range [4,7] and L is in the range [0,3].
2296 // H, 1, 2, 3 or L, 5, 6, 7
2297 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2298 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2299 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2300 InsertAtByte = IsLE ? 12 : 0;
2301 Swap = M0 < 4;
2302 return true;
2303 }
2304 // 0, H, 2, 3 or 4, L, 6, 7
2305 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2306 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2307 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2308 InsertAtByte = IsLE ? 8 : 4;
2309 Swap = M1 < 4;
2310 return true;
2311 }
2312 // 0, 1, H, 3 or 4, 5, L, 7
2313 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2314 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2315 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2316 InsertAtByte = IsLE ? 4 : 8;
2317 Swap = M2 < 4;
2318 return true;
2319 }
2320 // 0, 1, 2, H or 4, 5, 6, L
2321 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2322 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2323 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2324 InsertAtByte = IsLE ? 0 : 12;
2325 Swap = M3 < 4;
2326 return true;
2327 }
2328
2329 // If both vector operands for the shuffle are the same vector, the mask will
2330 // contain only elements from the first one and the second one will be undef.
2331 if (N->getOperand(1).isUndef()) {
2332 ShiftElts = 0;
2333 Swap = true;
2334 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2335 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2336 InsertAtByte = IsLE ? 12 : 0;
2337 return true;
2338 }
2339 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2340 InsertAtByte = IsLE ? 8 : 4;
2341 return true;
2342 }
2343 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2344 InsertAtByte = IsLE ? 4 : 8;
2345 return true;
2346 }
2347 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2348 InsertAtByte = IsLE ? 0 : 12;
2349 return true;
2350 }
2351 }
2352
2353 return false;
2354}
2355
2357 bool &Swap, bool IsLE) {
2358 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2359 // Ensure each byte index of the word is consecutive.
2360 if (!isNByteElemShuffleMask(N, 4, 1))
2361 return false;
2362
2363 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2364 unsigned M0 = N->getMaskElt(0) / 4;
2365 unsigned M1 = N->getMaskElt(4) / 4;
2366 unsigned M2 = N->getMaskElt(8) / 4;
2367 unsigned M3 = N->getMaskElt(12) / 4;
2368
2369 // If both vector operands for the shuffle are the same vector, the mask will
2370 // contain only elements from the first one and the second one will be undef.
2371 if (N->getOperand(1).isUndef()) {
2372 assert(M0 < 4 && "Indexing into an undef vector?");
2373 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2374 return false;
2375
2376 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2377 Swap = false;
2378 return true;
2379 }
2380
2381 // Ensure each word index of the ShuffleVector Mask is consecutive.
2382 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2383 return false;
2384
2385 if (IsLE) {
2386 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2387 // Input vectors don't need to be swapped if the leading element
2388 // of the result is one of the 3 left elements of the second vector
2389 // (or if there is no shift to be done at all).
2390 Swap = false;
2391 ShiftElts = (8 - M0) % 8;
2392 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2393 // Input vectors need to be swapped if the leading element
2394 // of the result is one of the 3 left elements of the first vector
2395 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2396 Swap = true;
2397 ShiftElts = (4 - M0) % 4;
2398 }
2399
2400 return true;
2401 } else { // BE
2402 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2403 // Input vectors don't need to be swapped if the leading element
2404 // of the result is one of the 4 elements of the first vector.
2405 Swap = false;
2406 ShiftElts = M0;
2407 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2408 // Input vectors need to be swapped if the leading element
2409 // of the result is one of the 4 elements of the right vector.
2410 Swap = true;
2411 ShiftElts = M0 - 4;
2412 }
2413
2414 return true;
2415 }
2416}
2417
2419 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2420
2421 if (!isNByteElemShuffleMask(N, Width, -1))
2422 return false;
2423
2424 for (int i = 0; i < 16; i += Width)
2425 if (N->getMaskElt(i) != i + Width - 1)
2426 return false;
2427
2428 return true;
2429}
2430
2432 return isXXBRShuffleMaskHelper(N, 2);
2433}
2434
2436 return isXXBRShuffleMaskHelper(N, 4);
2437}
2438
2440 return isXXBRShuffleMaskHelper(N, 8);
2441}
2442
2444 return isXXBRShuffleMaskHelper(N, 16);
2445}
2446
2447/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2448/// if the inputs to the instruction should be swapped and set \p DM to the
2449/// value for the immediate.
2450/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2451/// AND element 0 of the result comes from the first input (LE) or second input
2452/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2453/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2454/// mask.
2456 bool &Swap, bool IsLE) {
2457 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2458
2459 // Ensure each byte index of the double word is consecutive.
2460 if (!isNByteElemShuffleMask(N, 8, 1))
2461 return false;
2462
2463 unsigned M0 = N->getMaskElt(0) / 8;
2464 unsigned M1 = N->getMaskElt(8) / 8;
2465 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2466
2467 // If both vector operands for the shuffle are the same vector, the mask will
2468 // contain only elements from the first one and the second one will be undef.
2469 if (N->getOperand(1).isUndef()) {
2470 if ((M0 | M1) < 2) {
2471 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2472 Swap = false;
2473 return true;
2474 } else
2475 return false;
2476 }
2477
2478 if (IsLE) {
2479 if (M0 > 1 && M1 < 2) {
2480 Swap = false;
2481 } else if (M0 < 2 && M1 > 1) {
2482 M0 = (M0 + 2) % 4;
2483 M1 = (M1 + 2) % 4;
2484 Swap = true;
2485 } else
2486 return false;
2487
2488 // Note: if control flow comes here that means Swap is already set above
2489 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2490 return true;
2491 } else { // BE
2492 if (M0 < 2 && M1 > 1) {
2493 Swap = false;
2494 } else if (M0 > 1 && M1 < 2) {
2495 M0 = (M0 + 2) % 4;
2496 M1 = (M1 + 2) % 4;
2497 Swap = true;
2498 } else
2499 return false;
2500
2501 // Note: if control flow comes here that means Swap is already set above
2502 DM = (M0 << 1) + (M1 & 1);
2503 return true;
2504 }
2505}
2506
2507
2508/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2509/// appropriate for PPC mnemonics (which have a big endian bias - namely
2510/// elements are counted from the left of the vector register).
2511unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2512 SelectionDAG &DAG) {
2513 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2514 assert(isSplatShuffleMask(SVOp, EltSize));
2515 EVT VT = SVOp->getValueType(0);
2516
2517 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2518 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2519 : SVOp->getMaskElt(0);
2520
2521 if (DAG.getDataLayout().isLittleEndian())
2522 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2523 else
2524 return SVOp->getMaskElt(0) / EltSize;
2525}
2526
2527/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2528/// by using a vspltis[bhw] instruction of the specified element size, return
2529/// the constant being splatted. The ByteSize field indicates the number of
2530/// bytes of each element [124] -> [bhw].
2532 SDValue OpVal;
2533
2534 // If ByteSize of the splat is bigger than the element size of the
2535 // build_vector, then we have a case where we are checking for a splat where
2536 // multiple elements of the buildvector are folded together into a single
2537 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2538 unsigned EltSize = 16/N->getNumOperands();
2539 if (EltSize < ByteSize) {
2540 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2541 SDValue UniquedVals[4];
2542 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2543
2544 // See if all of the elements in the buildvector agree across.
2545 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2546 if (N->getOperand(i).isUndef()) continue;
2547 // If the element isn't a constant, bail fully out.
2548 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2549
2550 if (!UniquedVals[i&(Multiple-1)].getNode())
2551 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2552 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2553 return SDValue(); // no match.
2554 }
2555
2556 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2557 // either constant or undef values that are identical for each chunk. See
2558 // if these chunks can form into a larger vspltis*.
2559
2560 // Check to see if all of the leading entries are either 0 or -1. If
2561 // neither, then this won't fit into the immediate field.
2562 bool LeadingZero = true;
2563 bool LeadingOnes = true;
2564 for (unsigned i = 0; i != Multiple-1; ++i) {
2565 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2566
2567 LeadingZero &= isNullConstant(UniquedVals[i]);
2568 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2569 }
2570 // Finally, check the least significant entry.
2571 if (LeadingZero) {
2572 if (!UniquedVals[Multiple-1].getNode())
2573 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2574 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2575 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2576 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2577 }
2578 if (LeadingOnes) {
2579 if (!UniquedVals[Multiple-1].getNode())
2580 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2581 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2582 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2583 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2584 }
2585
2586 return SDValue();
2587 }
2588
2589 // Check to see if this buildvec has a single non-undef value in its elements.
2590 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2591 if (N->getOperand(i).isUndef()) continue;
2592 if (!OpVal.getNode())
2593 OpVal = N->getOperand(i);
2594 else if (OpVal != N->getOperand(i))
2595 return SDValue();
2596 }
2597
2598 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2599
2600 unsigned ValSizeInBytes = EltSize;
2601 uint64_t Value = 0;
2602 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2603 Value = CN->getZExtValue();
2604 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2605 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2606 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2607 }
2608
2609 // If the splat value is larger than the element value, then we can never do
2610 // this splat. The only case that we could fit the replicated bits into our
2611 // immediate field for would be zero, and we prefer to use vxor for it.
2612 if (ValSizeInBytes < ByteSize) return SDValue();
2613
2614 // If the element value is larger than the splat value, check if it consists
2615 // of a repeated bit pattern of size ByteSize.
2616 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2617 return SDValue();
2618
2619 // Properly sign extend the value.
2620 int MaskVal = SignExtend32(Value, ByteSize * 8);
2621
2622 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2623 if (MaskVal == 0) return SDValue();
2624
2625 // Finally, if this value fits in a 5 bit sext field, return it
2626 if (SignExtend32<5>(MaskVal) == MaskVal)
2627 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2628 return SDValue();
2629}
2630
2631//===----------------------------------------------------------------------===//
2632// Addressing Mode Selection
2633//===----------------------------------------------------------------------===//
2634
2635/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2636/// or 64-bit immediate, and if the value can be accurately represented as a
2637/// sign extension from a 16-bit value. If so, this returns true and the
2638/// immediate.
2639bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2640 if (!isa<ConstantSDNode>(N))
2641 return false;
2642
2643 Imm = (int16_t)N->getAsZExtVal();
2644 if (N->getValueType(0) == MVT::i32)
2645 return Imm == (int32_t)N->getAsZExtVal();
2646 else
2647 return Imm == (int64_t)N->getAsZExtVal();
2648}
2650 return isIntS16Immediate(Op.getNode(), Imm);
2651}
2652
2653/// Used when computing address flags for selecting loads and stores.
2654/// If we have an OR, check if the LHS and RHS are provably disjoint.
2655/// An OR of two provably disjoint values is equivalent to an ADD.
2656/// Most PPC load/store instructions compute the effective address as a sum,
2657/// so doing this conversion is useful.
2658static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2659 if (N.getOpcode() != ISD::OR)
2660 return false;
2661 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2662 if (!LHSKnown.Zero.getBoolValue())
2663 return false;
2664 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2665 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2666}
2667
2668/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2669/// be represented as an indexed [r+r] operation.
2671 SDValue &Index,
2672 SelectionDAG &DAG) const {
2673 for (SDNode *U : N->uses()) {
2674 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2675 if (Memop->getMemoryVT() == MVT::f64) {
2676 Base = N.getOperand(0);
2677 Index = N.getOperand(1);
2678 return true;
2679 }
2680 }
2681 }
2682 return false;
2683}
2684
2685/// isIntS34Immediate - This method tests if value of node given can be
2686/// accurately represented as a sign extension from a 34-bit value. If so,
2687/// this returns true and the immediate.
2688bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2689 if (!isa<ConstantSDNode>(N))
2690 return false;
2691
2692 Imm = (int64_t)N->getAsZExtVal();
2693 return isInt<34>(Imm);
2694}
2696 return isIntS34Immediate(Op.getNode(), Imm);
2697}
2698
2699/// SelectAddressRegReg - Given the specified addressed, check to see if it
2700/// can be represented as an indexed [r+r] operation. Returns false if it
2701/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2702/// non-zero and N can be represented by a base register plus a signed 16-bit
2703/// displacement, make a more precise judgement by checking (displacement % \p
2704/// EncodingAlignment).
2707 MaybeAlign EncodingAlignment) const {
2708 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2709 // a [pc+imm].
2711 return false;
2712
2713 int16_t Imm = 0;
2714 if (N.getOpcode() == ISD::ADD) {
2715 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2716 // SPE load/store can only handle 8-bit offsets.
2717 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2718 return true;
2719 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2720 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2721 return false; // r+i
2722 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2723 return false; // r+i
2724
2725 Base = N.getOperand(0);
2726 Index = N.getOperand(1);
2727 return true;
2728 } else if (N.getOpcode() == ISD::OR) {
2729 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2730 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2731 return false; // r+i can fold it if we can.
2732
2733 // If this is an or of disjoint bitfields, we can codegen this as an add
2734 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2735 // disjoint.
2736 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2737
2738 if (LHSKnown.Zero.getBoolValue()) {
2739 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2740 // If all of the bits are known zero on the LHS or RHS, the add won't
2741 // carry.
2742 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2743 Base = N.getOperand(0);
2744 Index = N.getOperand(1);
2745 return true;
2746 }
2747 }
2748 }
2749
2750 return false;
2751}
2752
2753// If we happen to be doing an i64 load or store into a stack slot that has
2754// less than a 4-byte alignment, then the frame-index elimination may need to
2755// use an indexed load or store instruction (because the offset may not be a
2756// multiple of 4). The extra register needed to hold the offset comes from the
2757// register scavenger, and it is possible that the scavenger will need to use
2758// an emergency spill slot. As a result, we need to make sure that a spill slot
2759// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2760// stack slot.
2761static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2762 // FIXME: This does not handle the LWA case.
2763 if (VT != MVT::i64)
2764 return;
2765
2766 // NOTE: We'll exclude negative FIs here, which come from argument
2767 // lowering, because there are no known test cases triggering this problem
2768 // using packed structures (or similar). We can remove this exclusion if
2769 // we find such a test case. The reason why this is so test-case driven is
2770 // because this entire 'fixup' is only to prevent crashes (from the
2771 // register scavenger) on not-really-valid inputs. For example, if we have:
2772 // %a = alloca i1
2773 // %b = bitcast i1* %a to i64*
2774 // store i64* a, i64 b
2775 // then the store should really be marked as 'align 1', but is not. If it
2776 // were marked as 'align 1' then the indexed form would have been
2777 // instruction-selected initially, and the problem this 'fixup' is preventing
2778 // won't happen regardless.
2779 if (FrameIdx < 0)
2780 return;
2781
2783 MachineFrameInfo &MFI = MF.getFrameInfo();
2784
2785 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2786 return;
2787
2788 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2789 FuncInfo->setHasNonRISpills();
2790}
2791
2792/// Returns true if the address N can be represented by a base register plus
2793/// a signed 16-bit displacement [r+imm], and if it is not better
2794/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2795/// displacements that are multiples of that value.
2797 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2798 MaybeAlign EncodingAlignment) const {
2799 // FIXME dl should come from parent load or store, not from address
2800 SDLoc dl(N);
2801
2802 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2803 // a [pc+imm].
2805 return false;
2806
2807 // If this can be more profitably realized as r+r, fail.
2808 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2809 return false;
2810
2811 if (N.getOpcode() == ISD::ADD) {
2812 int16_t imm = 0;
2813 if (isIntS16Immediate(N.getOperand(1), imm) &&
2814 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2815 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2816 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2817 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2818 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2819 } else {
2820 Base = N.getOperand(0);
2821 }
2822 return true; // [r+i]
2823 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2824 // Match LOAD (ADD (X, Lo(G))).
2825 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2826 "Cannot handle constant offsets yet!");
2827 Disp = N.getOperand(1).getOperand(0); // The global address.
2832 Base = N.getOperand(0);
2833 return true; // [&g+r]
2834 }
2835 } else if (N.getOpcode() == ISD::OR) {
2836 int16_t imm = 0;
2837 if (isIntS16Immediate(N.getOperand(1), imm) &&
2838 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2839 // If this is an or of disjoint bitfields, we can codegen this as an add
2840 // (for better address arithmetic) if the LHS and RHS of the OR are
2841 // provably disjoint.
2842 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2843
2844 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2845 // If all of the bits are known zero on the LHS or RHS, the add won't
2846 // carry.
2847 if (FrameIndexSDNode *FI =
2848 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2849 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2850 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2851 } else {
2852 Base = N.getOperand(0);
2853 }
2854 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2855 return true;
2856 }
2857 }
2858 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2859 // Loading from a constant address.
2860
2861 // If this address fits entirely in a 16-bit sext immediate field, codegen
2862 // this as "d, 0"
2863 int16_t Imm;
2864 if (isIntS16Immediate(CN, Imm) &&
2865 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2866 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2867 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2868 CN->getValueType(0));
2869 return true;
2870 }
2871
2872 // Handle 32-bit sext immediates with LIS + addr mode.
2873 if ((CN->getValueType(0) == MVT::i32 ||
2874 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2875 (!EncodingAlignment ||
2876 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2877 int Addr = (int)CN->getZExtValue();
2878
2879 // Otherwise, break this down into an LIS + disp.
2880 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2881
2882 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2883 MVT::i32);
2884 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2885 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2886 return true;
2887 }
2888 }
2889
2890 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2891 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2892 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2893 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2894 } else
2895 Base = N;
2896 return true; // [r+0]
2897}
2898
2899/// Similar to the 16-bit case but for instructions that take a 34-bit
2900/// displacement field (prefixed loads/stores).
2902 SDValue &Base,
2903 SelectionDAG &DAG) const {
2904 // Only on 64-bit targets.
2905 if (N.getValueType() != MVT::i64)
2906 return false;
2907
2908 SDLoc dl(N);
2909 int64_t Imm = 0;
2910
2911 if (N.getOpcode() == ISD::ADD) {
2912 if (!isIntS34Immediate(N.getOperand(1), Imm))
2913 return false;
2914 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2915 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2916 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2917 else
2918 Base = N.getOperand(0);
2919 return true;
2920 }
2921
2922 if (N.getOpcode() == ISD::OR) {
2923 if (!isIntS34Immediate(N.getOperand(1), Imm))
2924 return false;
2925 // If this is an or of disjoint bitfields, we can codegen this as an add
2926 // (for better address arithmetic) if the LHS and RHS of the OR are
2927 // provably disjoint.
2928 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2929 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2930 return false;
2931 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2932 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2933 else
2934 Base = N.getOperand(0);
2935 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2936 return true;
2937 }
2938
2939 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2940 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2941 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2942 return true;
2943 }
2944
2945 return false;
2946}
2947
2948/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2949/// represented as an indexed [r+r] operation.
2951 SDValue &Index,
2952 SelectionDAG &DAG) const {
2953 // Check to see if we can easily represent this as an [r+r] address. This
2954 // will fail if it thinks that the address is more profitably represented as
2955 // reg+imm, e.g. where imm = 0.
2956 if (SelectAddressRegReg(N, Base, Index, DAG))
2957 return true;
2958
2959 // If the address is the result of an add, we will utilize the fact that the
2960 // address calculation includes an implicit add. However, we can reduce
2961 // register pressure if we do not materialize a constant just for use as the
2962 // index register. We only get rid of the add if it is not an add of a
2963 // value and a 16-bit signed constant and both have a single use.
2964 int16_t imm = 0;
2965 if (N.getOpcode() == ISD::ADD &&
2966 (!isIntS16Immediate(N.getOperand(1), imm) ||
2967 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2968 Base = N.getOperand(0);
2969 Index = N.getOperand(1);
2970 return true;
2971 }
2972
2973 // Otherwise, do it the hard way, using R0 as the base register.
2974 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2975 N.getValueType());
2976 Index = N;
2977 return true;
2978}
2979
2980template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2981 Ty *PCRelCand = dyn_cast<Ty>(N);
2982 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2983}
2984
2985/// Returns true if this address is a PC Relative address.
2986/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2987/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2989 // This is a materialize PC Relative node. Always select this as PC Relative.
2990 Base = N;
2991 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2992 return true;
2993 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2994 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2995 isValidPCRelNode<JumpTableSDNode>(N) ||
2996 isValidPCRelNode<BlockAddressSDNode>(N))
2997 return true;
2998 return false;
2999}
3000
3001/// Returns true if we should use a direct load into vector instruction
3002/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3003static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3004
3005 // If there are any other uses other than scalar to vector, then we should
3006 // keep it as a scalar load -> direct move pattern to prevent multiple
3007 // loads.
3008 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3009 if (!LD)
3010 return false;
3011
3012 EVT MemVT = LD->getMemoryVT();
3013 if (!MemVT.isSimple())
3014 return false;
3015 switch(MemVT.getSimpleVT().SimpleTy) {
3016 case MVT::i64:
3017 break;
3018 case MVT::i32:
3019 if (!ST.hasP8Vector())
3020 return false;
3021 break;
3022 case MVT::i16:
3023 case MVT::i8:
3024 if (!ST.hasP9Vector())
3025 return false;
3026 break;
3027 default:
3028 return false;
3029 }
3030
3031 SDValue LoadedVal(N, 0);
3032 if (!LoadedVal.hasOneUse())
3033 return false;
3034
3035 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3036 UI != UE; ++UI)
3037 if (UI.getUse().get().getResNo() == 0 &&
3038 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3039 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3040 return false;
3041
3042 return true;
3043}
3044
3045/// getPreIndexedAddressParts - returns true by value, base pointer and
3046/// offset pointer and addressing mode by reference if the node's address
3047/// can be legally represented as pre-indexed load / store address.
3049 SDValue &Offset,
3051 SelectionDAG &DAG) const {
3052 if (DisablePPCPreinc) return false;
3053
3054 bool isLoad = true;
3055 SDValue Ptr;
3056 EVT VT;
3057 Align Alignment;
3058 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3059 Ptr = LD->getBasePtr();
3060 VT = LD->getMemoryVT();
3061 Alignment = LD->getAlign();
3062 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3063 Ptr = ST->getBasePtr();
3064 VT = ST->getMemoryVT();
3065 Alignment = ST->getAlign();
3066 isLoad = false;
3067 } else
3068 return false;
3069
3070 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3071 // instructions because we can fold these into a more efficient instruction
3072 // instead, (such as LXSD).
3073 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3074 return false;
3075 }
3076
3077 // PowerPC doesn't have preinc load/store instructions for vectors
3078 if (VT.isVector())
3079 return false;
3080
3081 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3082 // Common code will reject creating a pre-inc form if the base pointer
3083 // is a frame index, or if N is a store and the base pointer is either
3084 // the same as or a predecessor of the value being stored. Check for
3085 // those situations here, and try with swapped Base/Offset instead.
3086 bool Swap = false;
3087
3088 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3089 Swap = true;
3090 else if (!isLoad) {
3091 SDValue Val = cast<StoreSDNode>(N)->getValue();
3092 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3093 Swap = true;
3094 }
3095
3096 if (Swap)
3098
3099 AM = ISD::PRE_INC;
3100 return true;
3101 }
3102
3103 // LDU/STU can only handle immediates that are a multiple of 4.
3104 if (VT != MVT::i64) {
3105 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3106 return false;
3107 } else {
3108 // LDU/STU need an address with at least 4-byte alignment.
3109 if (Alignment < Align(4))
3110 return false;
3111
3112 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3113 return false;
3114 }
3115
3116 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3117 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3118 // sext i32 to i64 when addr mode is r+i.
3119 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3120 LD->getExtensionType() == ISD::SEXTLOAD &&
3121 isa<ConstantSDNode>(Offset))
3122 return false;
3123 }
3124
3125 AM = ISD::PRE_INC;
3126 return true;
3127}
3128
3129//===----------------------------------------------------------------------===//
3130// LowerOperation implementation
3131//===----------------------------------------------------------------------===//
3132
3133/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3134/// and LoOpFlags to the target MO flags.
3135static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3136 unsigned &HiOpFlags, unsigned &LoOpFlags,
3137 const GlobalValue *GV = nullptr) {
3138 HiOpFlags = PPCII::MO_HA;
3139 LoOpFlags = PPCII::MO_LO;
3140
3141 // Don't use the pic base if not in PIC relocation model.
3142 if (IsPIC) {
3143 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3144 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3145 }
3146}
3147
3148static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3149 SelectionDAG &DAG) {
3150 SDLoc DL(HiPart);
3151 EVT PtrVT = HiPart.getValueType();
3152 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3153
3154 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3155 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3156
3157 // With PIC, the first instruction is actually "GR+hi(&G)".
3158 if (isPIC)
3159 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3160 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3161
3162 // Generate non-pic code that has direct accesses to the constant pool.
3163 // The address of the global is just (hi(&g)+lo(&g)).
3164 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3165}
3166
3168 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3169 FuncInfo->setUsesTOCBasePtr();
3170}
3171
3174}
3175
3176SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3177 SDValue GA) const {
3178 const bool Is64Bit = Subtarget.isPPC64();
3179 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3180 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3181 : Subtarget.isAIXABI()
3182 ? DAG.getRegister(PPC::R2, VT)
3183 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3184 SDValue Ops[] = { GA, Reg };
3185 return DAG.getMemIntrinsicNode(
3186 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3189}
3190
3191SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3192 SelectionDAG &DAG) const {
3193 EVT PtrVT = Op.getValueType();
3194 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3195 const Constant *C = CP->getConstVal();
3196
3197 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3198 // The actual address of the GlobalValue is stored in the TOC.
3199 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3200 if (Subtarget.isUsingPCRelativeCalls()) {
3201 SDLoc DL(CP);
3202 EVT Ty = getPointerTy(DAG.getDataLayout());
3203 SDValue ConstPool = DAG.getTargetConstantPool(
3204 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3205 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3206 }
3207 setUsesTOCBasePtr(DAG);
3208 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3209 return getTOCEntry(DAG, SDLoc(CP), GA);
3210 }
3211
3212 unsigned MOHiFlag, MOLoFlag;
3213 bool IsPIC = isPositionIndependent();
3214 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3215
3216 if (IsPIC && Subtarget.isSVR4ABI()) {
3217 SDValue GA =
3218 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3219 return getTOCEntry(DAG, SDLoc(CP), GA);
3220 }
3221
3222 SDValue CPIHi =
3223 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3224 SDValue CPILo =
3225 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3226 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3227}
3228
3229// For 64-bit PowerPC, prefer the more compact relative encodings.
3230// This trades 32 bits per jump table entry for one or two instructions
3231// on the jump site.
3233 if (isJumpTableRelative())
3235
3237}
3238
3241 return false;
3242 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3243 return true;
3245}
3246
3248 SelectionDAG &DAG) const {
3249 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3251
3252 switch (getTargetMachine().getCodeModel()) {
3253 case CodeModel::Small:
3254 case CodeModel::Medium:
3256 default:
3257 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3259 }
3260}
3261
3262const MCExpr *
3264 unsigned JTI,
3265 MCContext &Ctx) const {
3266 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3268
3269 switch (getTargetMachine().getCodeModel()) {
3270 case CodeModel::Small:
3271 case CodeModel::Medium:
3273 default:
3274 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3275 }
3276}
3277
3278SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3279 EVT PtrVT = Op.getValueType();
3280 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3281
3282 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3283 if (Subtarget.isUsingPCRelativeCalls()) {
3284 SDLoc DL(JT);
3285 EVT Ty = getPointerTy(DAG.getDataLayout());
3286 SDValue GA =
3287 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3288 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3289 return MatAddr;
3290 }
3291
3292 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3293 // The actual address of the GlobalValue is stored in the TOC.
3294 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3295 setUsesTOCBasePtr(DAG);
3296 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3297 return getTOCEntry(DAG, SDLoc(JT), GA);
3298 }
3299
3300 unsigned MOHiFlag, MOLoFlag;
3301 bool IsPIC = isPositionIndependent();
3302 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3303
3304 if (IsPIC && Subtarget.isSVR4ABI()) {
3305 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3307 return getTOCEntry(DAG, SDLoc(GA), GA);
3308 }
3309
3310 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3311 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3312 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3313}
3314
3315SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3316 SelectionDAG &DAG) const {
3317 EVT PtrVT = Op.getValueType();
3318 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3319 const BlockAddress *BA = BASDN->getBlockAddress();
3320
3321 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3322 if (Subtarget.isUsingPCRelativeCalls()) {
3323 SDLoc DL(BASDN);
3324 EVT Ty = getPointerTy(DAG.getDataLayout());
3325 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3327 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3328 return MatAddr;
3329 }
3330
3331 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3332 // The actual BlockAddress is stored in the TOC.
3333 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3334 setUsesTOCBasePtr(DAG);
3335 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3336 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3337 }
3338
3339 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3340 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3341 return getTOCEntry(
3342 DAG, SDLoc(BASDN),
3343 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3344
3345 unsigned MOHiFlag, MOLoFlag;
3346 bool IsPIC = isPositionIndependent();
3347 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3348 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3349 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3350 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3351}
3352
3353SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3354 SelectionDAG &DAG) const {
3355 if (Subtarget.isAIXABI())
3356 return LowerGlobalTLSAddressAIX(Op, DAG);
3357
3358 return LowerGlobalTLSAddressLinux(Op, DAG);
3359}
3360
3361/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3362/// and then apply the update.
3364 SelectionDAG &DAG,
3365 const TargetMachine &TM) {
3366 // Initialize TLS model opt setting lazily:
3367 // (1) Use initial-exec for single TLS var references within current function.
3368 // (2) Use local-dynamic for multiple TLS var references within current
3369 // function.
3370 PPCFunctionInfo *FuncInfo =
3372 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3374 // Iterate over all instructions within current function, collect all TLS
3375 // global variables (global variables taken as the first parameter to
3376 // Intrinsic::threadlocal_address).
3377 const Function &Func = DAG.getMachineFunction().getFunction();
3378 for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3379 ++BI)
3380 for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3381 II != IE; ++II)
3382 if (II->getOpcode() == Instruction::Call)
3383 if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3384 if (Function *CF = CI->getCalledFunction())
3385 if (CF->isDeclaration() &&
3386 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3387 if (const GlobalValue *GV =
3388 dyn_cast<GlobalValue>(II->getOperand(0))) {
3389 TLSModel::Model GVModel = TM.getTLSModel(GV);
3390 if (GVModel == TLSModel::LocalDynamic)
3391 TLSGV.insert(GV);
3392 }
3393
3394 unsigned TLSGVCnt = TLSGV.size();
3395 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3396 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3397 FuncInfo->setAIXFuncUseTLSIEForLD();
3399 }
3400
3401 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3402 LLVM_DEBUG(
3403 dbgs() << DAG.getMachineFunction().getName()
3404 << " function is using the TLS-IE model for TLS-LD access.\n");
3405 Model = TLSModel::InitialExec;
3406 }
3407}
3408
3409SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3410 SelectionDAG &DAG) const {
3411 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3412
3413 if (DAG.getTarget().useEmulatedTLS())
3414 report_fatal_error("Emulated TLS is not yet supported on AIX");
3415
3416 SDLoc dl(GA);
3417 const GlobalValue *GV = GA->getGlobal();
3418 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3419 bool Is64Bit = Subtarget.isPPC64();
3421
3422 // Apply update to the TLS model.
3423 if (Subtarget.hasAIXShLibTLSModelOpt())
3425
3426 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3427
3428 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3429 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3430 bool HasAIXSmallTLSGlobalAttr = false;
3431 SDValue VariableOffsetTGA =
3432 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3433 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3434 SDValue TLSReg;
3435
3436 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3437 if (GVar->hasAttribute("aix-small-tls"))
3438 HasAIXSmallTLSGlobalAttr = true;
3439
3440 if (Is64Bit) {
3441 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3442 // involves a load of the variable offset (from the TOC), followed by an
3443 // add of the loaded variable offset to R13 (the thread pointer).
3444 // This code sequence looks like:
3445 // ld reg1,var[TC](2)
3446 // add reg2, reg1, r13 // r13 contains the thread pointer
3447 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3448
3449 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3450 // global variable attribute, produce a faster access sequence for
3451 // local-exec TLS variables where the offset from the TLS base is encoded
3452 // as an immediate operand.
3453 //
3454 // We only utilize the faster local-exec access sequence when the TLS
3455 // variable has a size within the policy limit. We treat types that are
3456 // not sized or are empty as being over the policy size limit.
3457 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3458 IsTLSLocalExecModel) {
3459 Type *GVType = GV->getValueType();
3460 if (GVType->isSized() && !GVType->isEmptyTy() &&
3461 GV->getDataLayout().getTypeAllocSize(GVType) <=
3463 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3464 }
3465 } else {
3466 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3467 // involves loading the variable offset from the TOC, generating a call to
3468 // .__get_tpointer to get the thread pointer (which will be in R3), and
3469 // adding the two together:
3470 // lwz reg1,var[TC](2)
3471 // bla .__get_tpointer
3472 // add reg2, reg1, r3
3473 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3474
3475 // We do not implement the 32-bit version of the faster access sequence
3476 // for local-exec that is controlled by the -maix-small-local-exec-tls
3477 // option, or the "aix-small-tls" global variable attribute.
3478 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3479 report_fatal_error("The small-local-exec TLS access sequence is "
3480 "currently only supported on AIX (64-bit mode).");
3481 }
3482 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3483 }
3484
3485 if (Model == TLSModel::LocalDynamic) {
3486 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3487
3488 // We do not implement the 32-bit version of the faster access sequence
3489 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3490 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3491 report_fatal_error("The small-local-dynamic TLS access sequence is "
3492 "currently only supported on AIX (64-bit mode).");
3493
3494 // For local-dynamic on AIX, we need to generate one TOC entry for each
3495 // variable offset, and a single module-handle TOC entry for the entire
3496 // file.
3497
3498 SDValue VariableOffsetTGA =
3499 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3500 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3501
3503 GlobalVariable *TLSGV =
3504 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3505 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3507 assert(TLSGV && "Not able to create GV for _$TLSML.");
3508 SDValue ModuleHandleTGA =
3509 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3510 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3511 SDValue ModuleHandle =
3512 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3513
3514 // With the -maix-small-local-dynamic-tls option, produce a faster access
3515 // sequence for local-dynamic TLS variables where the offset from the
3516 // module-handle is encoded as an immediate operand.
3517 //
3518 // We only utilize the faster local-dynamic access sequence when the TLS
3519 // variable has a size within the policy limit. We treat types that are
3520 // not sized or are empty as being over the policy size limit.
3521 if (HasAIXSmallLocalDynamicTLS) {
3522 Type *GVType = GV->getValueType();
3523 if (GVType->isSized() && !GVType->isEmptyTy() &&
3524 GV->getDataLayout().getTypeAllocSize(GVType) <=
3526 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3527 ModuleHandle);
3528 }
3529
3530 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3531 }
3532
3533 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3534 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3535 // need to generate two TOC entries, one for the variable offset, one for the
3536 // region handle. The global address for the TOC entry of the region handle is
3537 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3538 // entry of the variable offset is created with MO_TLSGD_FLAG.
3539 SDValue VariableOffsetTGA =
3540 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3541 SDValue RegionHandleTGA =
3542 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3543 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3544 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3545 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3546 RegionHandle);
3547}
3548
3549SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3550 SelectionDAG &DAG) const {
3551 // FIXME: TLS addresses currently use medium model code sequences,
3552 // which is the most useful form. Eventually support for small and
3553 // large models could be added if users need it, at the cost of
3554 // additional complexity.
3555 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3556 if (DAG.getTarget().useEmulatedTLS())
3557 return LowerToTLSEmulatedModel(GA, DAG);
3558
3559 SDLoc dl(GA);
3560 const GlobalValue *GV = GA->getGlobal();
3561 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3562 bool is64bit = Subtarget.isPPC64();
3563 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3564 PICLevel::Level picLevel = M->getPICLevel();
3565
3567 TLSModel::Model Model = TM.getTLSModel(GV);
3568
3569 if (Model == TLSModel::LocalExec) {
3570 if (Subtarget.isUsingPCRelativeCalls()) {
3571 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3572 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3574 SDValue MatAddr =
3575 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3576 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3577 }
3578
3579 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3581 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3583 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3584 : DAG.getRegister(PPC::R2, MVT::i32);
3585
3586 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3587 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3588 }
3589
3590 if (Model == TLSModel::InitialExec) {
3591 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3593 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3594 SDValue TGATLS = DAG.getTargetGlobalAddress(
3595 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3596 SDValue TPOffset;
3597 if (IsPCRel) {
3598 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3599 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3601 } else {
3602 SDValue GOTPtr;
3603 if (is64bit) {
3604 setUsesTOCBasePtr(DAG);
3605 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3606 GOTPtr =
3607 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3608 } else {
3609 if (!TM.isPositionIndependent())
3610 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3611 else if (picLevel == PICLevel::SmallPIC)
3612 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3613 else
3614 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3615 }
3616 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3617 }
3618 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3619 }
3620
3621 if (Model == TLSModel::GeneralDynamic) {
3622 if (Subtarget.isUsingPCRelativeCalls()) {
3623 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3625 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3626 }
3627
3628 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3629 SDValue GOTPtr;
3630 if (is64bit) {
3631 setUsesTOCBasePtr(DAG);
3632 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3633 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3634 GOTReg, TGA);
3635 } else {
3636 if (picLevel == PICLevel::SmallPIC)
3637 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3638 else
3639 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3640 }
3641 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3642 GOTPtr, TGA, TGA);
3643 }
3644
3645 if (Model == TLSModel::LocalDynamic) {
3646 if (Subtarget.isUsingPCRelativeCalls()) {
3647 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3649 SDValue MatPCRel =
3650 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3651 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3652 }
3653
3654 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3655 SDValue GOTPtr;
3656 if (is64bit) {
3657 setUsesTOCBasePtr(DAG);
3658 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3659 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3660 GOTReg, TGA);
3661 } else {
3662 if (picLevel == PICLevel::SmallPIC)
3663 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3664 else
3665 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3666 }
3667 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3668 PtrVT, GOTPtr, TGA, TGA);
3669 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3670 PtrVT, TLSAddr, TGA);
3671 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3672 }
3673
3674 llvm_unreachable("Unknown TLS model!");
3675}
3676
3677SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3678 SelectionDAG &DAG) const {
3679 EVT PtrVT = Op.getValueType();
3680 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3681 SDLoc DL(GSDN);
3682 const GlobalValue *GV = GSDN->getGlobal();
3683
3684 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3685 // The actual address of the GlobalValue is stored in the TOC.
3686 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3687 if (Subtarget.isUsingPCRelativeCalls()) {
3688 EVT Ty = getPointerTy(DAG.getDataLayout());
3690 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3692 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3693 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3695 return Load;
3696 } else {
3697 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3699 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3700 }
3701 }
3702 setUsesTOCBasePtr(DAG);
3703 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3704 return getTOCEntry(DAG, DL, GA);
3705 }
3706
3707 unsigned MOHiFlag, MOLoFlag;
3708 bool IsPIC = isPositionIndependent();
3709 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3710
3711 if (IsPIC && Subtarget.isSVR4ABI()) {
3712 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3713 GSDN->getOffset(),
3715 return getTOCEntry(DAG, DL, GA);
3716 }
3717
3718 SDValue GAHi =
3719 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3720 SDValue GALo =
3721 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3722
3723 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3724}
3725
3726SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3727 bool IsStrict = Op->isStrictFPOpcode();
3729 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3730 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3731 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3732 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3733 EVT LHSVT = LHS.getValueType();
3734 SDLoc dl(Op);
3735
3736 // Soften the setcc with libcall if it is fp128.
3737 if (LHSVT == MVT::f128) {
3738 assert(!Subtarget.hasP9Vector() &&
3739 "SETCC for f128 is already legal under Power9!");
3740 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3741 Op->getOpcode() == ISD::STRICT_FSETCCS);
3742 if (RHS.getNode())
3743 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3744 DAG.getCondCode(CC));
3745 if (IsStrict)
3746 return DAG.getMergeValues({LHS, Chain}, dl);
3747 return LHS;
3748 }
3749
3750 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3751
3752 if (Op.getValueType() == MVT::v2i64) {
3753 // When the operands themselves are v2i64 values, we need to do something
3754 // special because VSX has no underlying comparison operations for these.
3755 if (LHS.getValueType() == MVT::v2i64) {
3756 // Equality can be handled by casting to the legal type for Altivec
3757 // comparisons, everything else needs to be expanded.
3758 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3759 return SDValue();
3760 SDValue SetCC32 = DAG.getSetCC(
3761 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3762 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3763 int ShuffV[] = {1, 0, 3, 2};
3764 SDValue Shuff =
3765 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3766 return DAG.getBitcast(MVT::v2i64,
3768 dl, MVT::v4i32, Shuff, SetCC32));
3769 }
3770
3771 // We handle most of these in the usual way.
3772 return Op;
3773 }
3774
3775 // If we're comparing for equality to zero, expose the fact that this is
3776 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3777 // fold the new nodes.
3778 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3779 return V;
3780
3781 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3782 // Leave comparisons against 0 and -1 alone for now, since they're usually
3783 // optimized. FIXME: revisit this when we can custom lower all setcc
3784 // optimizations.
3785 if (C->isAllOnes() || C->isZero())
3786 return SDValue();
3787 }
3788
3789 // If we have an integer seteq/setne, turn it into a compare against zero
3790 // by xor'ing the rhs with the lhs, which is faster than setting a
3791 // condition register, reading it back out, and masking the correct bit. The
3792 // normal approach here uses sub to do this instead of xor. Using xor exposes
3793 // the result to other bit-twiddling opportunities.
3794 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3795 EVT VT = Op.getValueType();
3796 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3797 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3798 }
3799 return SDValue();
3800}
3801
3802SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3803 SDNode *Node = Op.getNode();
3804 EVT VT = Node->getValueType(0);
3805 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3806 SDValue InChain = Node->getOperand(0);
3807 SDValue VAListPtr = Node->getOperand(1);
3808 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3809 SDLoc dl(Node);
3810
3811 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3812
3813 // gpr_index
3814 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3815 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3816 InChain = GprIndex.getValue(1);
3817
3818 if (VT == MVT::i64) {
3819 // Check if GprIndex is even
3820 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3821 DAG.getConstant(1, dl, MVT::i32));
3822 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3823 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3824 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3825 DAG.getConstant(1, dl, MVT::i32));
3826 // Align GprIndex to be even if it isn't
3827 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3828 GprIndex);
3829 }
3830
3831 // fpr index is 1 byte after gpr
3832 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3833 DAG.getConstant(1, dl, MVT::i32));
3834
3835 // fpr
3836 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3837 FprPtr, MachinePointerInfo(SV), MVT::i8);
3838 InChain = FprIndex.getValue(1);
3839
3840 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3841 DAG.getConstant(8, dl, MVT::i32));
3842
3843 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3844 DAG.getConstant(4, dl, MVT::i32));
3845
3846 // areas
3847 SDValue OverflowArea =
3848 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3849 InChain = OverflowArea.getValue(1);
3850
3851 SDValue RegSaveArea =
3852 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3853 InChain = RegSaveArea.getValue(1);
3854
3855 // select overflow_area if index > 8
3856 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3857 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3858
3859 // adjustment constant gpr_index * 4/8
3860 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3861 VT.isInteger() ? GprIndex : FprIndex,
3862 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3863 MVT::i32));
3864
3865 // OurReg = RegSaveArea + RegConstant
3866 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3867 RegConstant);
3868
3869 // Floating types are 32 bytes into RegSaveArea
3870 if (VT.isFloatingPoint())
3871 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3872 DAG.getConstant(32, dl, MVT::i32));
3873
3874 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3875 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3876 VT.isInteger() ? GprIndex : FprIndex,
3877 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3878 MVT::i32));
3879
3880 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3881 VT.isInteger() ? VAListPtr : FprPtr,
3882 MachinePointerInfo(SV), MVT::i8);
3883
3884 // determine if we should load from reg_save_area or overflow_area
3885 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3886
3887 // increase overflow_area by 4/8 if gpr/fpr > 8
3888 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3889 DAG.getConstant(VT.isInteger() ? 4 : 8,
3890 dl, MVT::i32));
3891
3892 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3893 OverflowAreaPlusN);
3894
3895 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3896 MachinePointerInfo(), MVT::i32);
3897
3898 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3899}
3900
3901SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3902 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3903
3904 // We have to copy the entire va_list struct:
3905 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3906 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3907 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3908 false, true, /*CI=*/nullptr, std::nullopt,
3910}
3911
3912SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3913 SelectionDAG &DAG) const {
3914 if (Subtarget.isAIXABI())
3915 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3916
3917 return Op.getOperand(0);
3918}
3919
3920SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3923
3924 assert((Op.getOpcode() == ISD::INLINEASM ||
3925 Op.getOpcode() == ISD::INLINEASM_BR) &&
3926 "Expecting Inline ASM node.");
3927
3928 // If an LR store is already known to be required then there is not point in
3929 // checking this ASM as well.
3930 if (MFI.isLRStoreRequired())
3931 return Op;
3932
3933 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3934 // type MVT::Glue. We want to ignore this last operand if that is the case.
3935 unsigned NumOps = Op.getNumOperands();
3936 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3937 --NumOps;
3938
3939 // Check all operands that may contain the LR.
3940 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3941 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3942 unsigned NumVals = Flags.getNumOperandRegisters();
3943 ++i; // Skip the ID value.
3944
3945 switch (Flags.getKind()) {
3946 default:
3947 llvm_unreachable("Bad flags!");
3951 i += NumVals;
3952 break;
3956 for (; NumVals; --NumVals, ++i) {
3957 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3958 if (Reg != PPC::LR && Reg != PPC::LR8)
3959 continue;
3960 MFI.setLRStoreRequired();
3961 return Op;
3962 }
3963 break;
3964 }
3965 }
3966 }
3967
3968 return Op;
3969}
3970
3971SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3972 SelectionDAG &DAG) const {
3973 if (Subtarget.isAIXABI())
3974 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3975
3976 SDValue Chain = Op.getOperand(0);
3977 SDValue Trmp = Op.getOperand(1); // trampoline
3978 SDValue FPtr = Op.getOperand(2); // nested function
3979 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3980 SDLoc dl(Op);
3981
3982 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3983 bool isPPC64 = (PtrVT == MVT::i64);
3984 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3985
3988
3989 Entry.Ty = IntPtrTy;
3990 Entry.Node = Trmp; Args.push_back(Entry);
3991
3992 // TrampSize == (isPPC64 ? 48 : 40);
3993 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3994 isPPC64 ? MVT::i64 : MVT::i32);
3995 Args.push_back(Entry);
3996
3997 Entry.Node = FPtr; Args.push_back(Entry);
3998 Entry.Node = Nest; Args.push_back(Entry);
3999
4000 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4002 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4004 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4005
4006 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4007 return CallResult.second;
4008}
4009
4010SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4012 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4013 EVT PtrVT = getPointerTy(MF.getDataLayout());
4014
4015 SDLoc dl(Op);
4016
4017 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4018 // vastart just stores the address of the VarArgsFrameIndex slot into the
4019 // memory location argument.
4020 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4021 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4022 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4023 MachinePointerInfo(SV));
4024 }
4025
4026 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4027 // We suppose the given va_list is already allocated.
4028 //
4029 // typedef struct {
4030 // char gpr; /* index into the array of 8 GPRs
4031 // * stored in the register save area
4032 // * gpr=0 corresponds to r3,
4033 // * gpr=1 to r4, etc.
4034 // */
4035 // char fpr; /* index into the array of 8 FPRs
4036 // * stored in the register save area
4037 // * fpr=0 corresponds to f1,
4038 // * fpr=1 to f2, etc.
4039 // */
4040 // char *overflow_arg_area;
4041 // /* location on stack that holds
4042 // * the next overflow argument
4043 // */
4044 // char *reg_save_area;
4045 // /* where r3:r10 and f1:f8 (if saved)
4046 // * are stored
4047 // */
4048 // } va_list[1];
4049
4050 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4051 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4052 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4053 PtrVT);
4054 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4055 PtrVT);
4056
4057 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4058 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4059
4060 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4061 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4062
4063 uint64_t FPROffset = 1;
4064 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4065
4066 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4067
4068 // Store first byte : number of int regs
4069 SDValue firstStore =
4070 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4071 MachinePointerInfo(SV), MVT::i8);
4072 uint64_t nextOffset = FPROffset;
4073 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4074 ConstFPROffset);
4075
4076 // Store second byte : number of float regs
4077 SDValue secondStore =
4078 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4079 MachinePointerInfo(SV, nextOffset), MVT::i8);
4080 nextOffset += StackOffset;
4081 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4082
4083 // Store second word : arguments given on stack
4084 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4085 MachinePointerInfo(SV, nextOffset));
4086 nextOffset += FrameOffset;
4087 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4088
4089 // Store third word : arguments given in registers
4090 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4091 MachinePointerInfo(SV, nextOffset));
4092}
4093
4094/// FPR - The set of FP registers that should be allocated for arguments
4095/// on Darwin and AIX.
4096static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4097 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4098 PPC::F11, PPC::F12, PPC::F13};
4099
4100/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4101/// the stack.
4102static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4103 unsigned PtrByteSize) {
4104 unsigned ArgSize = ArgVT.getStoreSize();
4105 if (Flags.isByVal())
4106 ArgSize = Flags.getByValSize();
4107
4108 // Round up to multiples of the pointer size, except for array members,
4109 // which are always packed.
4110 if (!Flags.isInConsecutiveRegs())
4111 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4112
4113 return ArgSize;
4114}
4115
4116/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4117/// on the stack.
4119 ISD::ArgFlagsTy Flags,
4120 unsigned PtrByteSize) {
4121 Align Alignment(PtrByteSize);
4122
4123 // Altivec parameters are padded to a 16 byte boundary.
4124 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4125 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4126 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4127 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4128 Alignment = Align(16);
4129
4130 // ByVal parameters are aligned as requested.
4131 if (Flags.isByVal()) {
4132 auto BVAlign = Flags.getNonZeroByValAlign();
4133 if (BVAlign > PtrByteSize) {
4134 if (BVAlign.value() % PtrByteSize != 0)
4136 "ByVal alignment is not a multiple of the pointer size");
4137
4138 Alignment = BVAlign;
4139 }
4140 }
4141
4142 // Array members are always packed to their original alignment.
4143 if (Flags.isInConsecutiveRegs()) {
4144 // If the array member was split into multiple registers, the first
4145 // needs to be aligned to the size of the full type. (Except for
4146 // ppcf128, which is only aligned as its f64 components.)
4147 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4148 Alignment = Align(OrigVT.getStoreSize());
4149 else
4150 Alignment = Align(ArgVT.getStoreSize());
4151 }
4152
4153 return Alignment;
4154}
4155
4156/// CalculateStackSlotUsed - Return whether this argument will use its
4157/// stack slot (instead of being passed in registers). ArgOffset,
4158/// AvailableFPRs, and AvailableVRs must hold the current argument
4159/// position, and will be updated to account for this argument.
4160static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4161 unsigned PtrByteSize, unsigned LinkageSize,
4162 unsigned ParamAreaSize, unsigned &ArgOffset,
4163 unsigned &AvailableFPRs,
4164 unsigned &AvailableVRs) {
4165 bool UseMemory = false;
4166
4167 // Respect alignment of argument on the stack.
4168 Align Alignment =
4169 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4170 ArgOffset = alignTo(ArgOffset, Alignment);
4171 // If there's no space left in the argument save area, we must
4172 // use memory (this check also catches zero-sized arguments).
4173 if (ArgOffset >= LinkageSize + ParamAreaSize)
4174 UseMemory = true;
4175
4176 // Allocate argument on the stack.
4177 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4178 if (Flags.isInConsecutiveRegsLast())
4179 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4180 // If we overran the argument save area, we must use memory
4181 // (this check catches arguments passed partially in memory)
4182 if (ArgOffset > LinkageSize + ParamAreaSize)
4183 UseMemory = true;
4184
4185 // However, if the argument is actually passed in an FPR or a VR,
4186 // we don't use memory after all.
4187 if (!Flags.isByVal()) {
4188 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4189 if (AvailableFPRs > 0) {
4190 --AvailableFPRs;
4191 return false;
4192 }
4193 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4194 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4195 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4196 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4197 if (AvailableVRs > 0) {
4198 --AvailableVRs;
4199 return false;
4200 }
4201 }
4202
4203 return UseMemory;
4204}
4205
4206/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4207/// ensure minimum alignment required for target.
4209 unsigned NumBytes) {
4210 return alignTo(NumBytes, Lowering->getStackAlign());
4211}
4212
4213SDValue PPCTargetLowering::LowerFormalArguments(
4214 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4215 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4216 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4217 if (Subtarget.isAIXABI())
4218 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4219 InVals);
4220 if (Subtarget.is64BitELFABI())
4221 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4222 InVals);
4223 assert(Subtarget.is32BitELFABI());
4224 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4225 InVals);
4226}
4227
4228SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4229 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4230 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4231 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4232
4233 // 32-bit SVR4 ABI Stack Frame Layout:
4234 // +-----------------------------------+
4235 // +--> | Back chain |
4236 // | +-----------------------------------+
4237 // | | Floating-point register save area |
4238 // | +-----------------------------------+
4239 // | | General register save area |
4240 // | +-----------------------------------+
4241 // | | CR save word |
4242 // | +-----------------------------------+
4243 // | | VRSAVE save word |
4244 // | +-----------------------------------+
4245 // | | Alignment padding |
4246 // | +-----------------------------------+
4247 // | | Vector register save area |
4248 // | +-----------------------------------+
4249 // | | Local variable space |
4250 // | +-----------------------------------+
4251 // | | Parameter list area |
4252 // | +-----------------------------------+
4253 // | | LR save word |
4254 // | +-----------------------------------+
4255 // SP--> +--- | Back chain |
4256 // +-----------------------------------+
4257 //
4258 // Specifications:
4259 // System V Application Binary Interface PowerPC Processor Supplement
4260 // AltiVec Technology Programming Interface Manual
4261
4263 MachineFrameInfo &MFI = MF.getFrameInfo();
4264 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4265
4266 EVT PtrVT = getPointerTy(MF.getDataLayout());
4267 // Potential tail calls could cause overwriting of argument stack slots.
4268 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4269 (CallConv == CallingConv::Fast));
4270 const Align PtrAlign(4);
4271
4272 // Assign locations to all of the incoming arguments.
4274 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4275 *DAG.getContext());
4276
4277 // Reserve space for the linkage area on the stack.
4278 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4279 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4280 if (useSoftFloat())
4281 CCInfo.PreAnalyzeFormalArguments(Ins);
4282
4283 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4284 CCInfo.clearWasPPCF128();
4285
4286 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4287 CCValAssign &VA = ArgLocs[i];
4288
4289 // Arguments stored in registers.
4290 if (VA.isRegLoc()) {
4291 const TargetRegisterClass *RC;
4292 EVT ValVT = VA.getValVT();
4293
4294 switch (ValVT.getSimpleVT().SimpleTy) {
4295 default:
4296 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4297 case MVT::i1:
4298 case MVT::i32:
4299 RC = &PPC::GPRCRegClass;
4300 break;
4301 case MVT::f32:
4302 if (Subtarget.hasP8Vector())
4303 RC = &PPC::VSSRCRegClass;
4304 else if (Subtarget.hasSPE())
4305 RC = &PPC::GPRCRegClass;
4306 else
4307 RC = &PPC::F4RCRegClass;
4308 break;
4309 case MVT::f64:
4310 if (Subtarget.hasVSX())
4311 RC = &PPC::VSFRCRegClass;
4312 else if (Subtarget.hasSPE())
4313 // SPE passes doubles in GPR pairs.
4314 RC = &PPC::GPRCRegClass;
4315 else
4316 RC = &PPC::F8RCRegClass;
4317 break;
4318 case MVT::v16i8:
4319 case MVT::v8i16:
4320 case MVT::v4i32:
4321 RC = &PPC::VRRCRegClass;
4322 break;
4323 case MVT::v4f32:
4324 RC = &PPC::VRRCRegClass;
4325 break;
4326 case MVT::v2f64:
4327 case MVT::v2i64:
4328 RC = &PPC::VRRCRegClass;
4329 break;
4330 }
4331
4332 SDValue ArgValue;
4333 // Transform the arguments stored in physical registers into
4334 // virtual ones.
4335 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4336 assert(i + 1 < e && "No second half of double precision argument");
4337 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4338 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4339 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4340 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4341 if (!Subtarget.isLittleEndian())
4342 std::swap (ArgValueLo, ArgValueHi);
4343 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4344 ArgValueHi);
4345 } else {
4346 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4347 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4348 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4349 if (ValVT == MVT::i1)
4350 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4351 }
4352
4353 InVals.push_back(ArgValue);
4354 } else {
4355 // Argument stored in memory.
4356 assert(VA.isMemLoc());
4357
4358 // Get the extended size of the argument type in stack
4359 unsigned ArgSize = VA.getLocVT().getStoreSize();
4360 // Get the actual size of the argument type
4361 unsigned ObjSize = VA.getValVT().getStoreSize();
4362 unsigned ArgOffset = VA.getLocMemOffset();
4363 // Stack objects in PPC32 are right justified.
4364 ArgOffset += ArgSize - ObjSize;
4365 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4366
4367 // Create load nodes to retrieve arguments from the stack.
4368 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4369 InVals.push_back(
4370 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4371 }
4372 }
4373
4374 // Assign locations to all of the incoming aggregate by value arguments.
4375 // Aggregates passed by value are stored in the local variable space of the
4376 // caller's stack frame, right above the parameter list area.
4377 SmallVector<CCValAssign, 16> ByValArgLocs;
4378 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4379 ByValArgLocs, *DAG.getContext());
4380
4381 // Reserve stack space for the allocations in CCInfo.
4382 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4383
4384 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4385
4386 // Area that is at least reserved in the caller of this function.
4387 unsigned MinReservedArea = CCByValInfo.getStackSize();
4388 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4389
4390 // Set the size that is at least reserved in caller of this function. Tail
4391 // call optimized function's reserved stack space needs to be aligned so that
4392 // taking the difference between two stack areas will result in an aligned
4393 // stack.
4394 MinReservedArea =
4395 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4396 FuncInfo->setMinReservedArea(MinReservedArea);
4397
4399
4400 // If the function takes variable number of arguments, make a frame index for
4401 // the start of the first vararg value... for expansion of llvm.va_start.
4402 if (isVarArg) {
4403 static const MCPhysReg GPArgRegs[] = {
4404 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4405 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4406 };
4407 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4408
4409 static const MCPhysReg FPArgRegs[] = {
4410 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4411 PPC::F8
4412 };
4413 unsigned NumFPArgRegs = std::size(FPArgRegs);
4414
4415 if (useSoftFloat() || hasSPE())
4416 NumFPArgRegs = 0;
4417
4418 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4419 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4420
4421 // Make room for NumGPArgRegs and NumFPArgRegs.
4422 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4423 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4424
4426 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4427
4428 FuncInfo->setVarArgsFrameIndex(
4429 MFI.CreateStackObject(Depth, Align(8), false));
4430 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4431
4432 // The fixed integer arguments of a variadic function are stored to the
4433 // VarArgsFrameIndex on the stack so that they may be loaded by
4434 // dereferencing the result of va_next.
4435 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4436 // Get an existing live-in vreg, or add a new one.
4437 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4438 if (!VReg)
4439 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4440
4441 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4442 SDValue Store =
4443 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4444 MemOps.push_back(Store);
4445 // Increment the address by four for the next argument to store
4446 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4447 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4448 }
4449
4450 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4451 // is set.
4452 // The double arguments are stored to the VarArgsFrameIndex
4453 // on the stack.
4454 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4455 // Get an existing live-in vreg, or add a new one.
4456 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4457 if (!VReg)
4458 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4459
4460 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4461 SDValue Store =
4462 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4463 MemOps.push_back(Store);
4464 // Increment the address by eight for the next argument to store
4465 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4466 PtrVT);
4467 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4468 }
4469 }
4470
4471 if (!MemOps.empty())
4472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4473
4474 return Chain;
4475}
4476
4477// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4478// value to MVT::i64 and then truncate to the correct register size.
4479SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4480 EVT ObjectVT, SelectionDAG &DAG,
4481 SDValue ArgVal,
4482 const SDLoc &dl) const {
4483 if (Flags.isSExt())
4484 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4485 DAG.getValueType(ObjectVT));
4486 else if (Flags.isZExt())
4487 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4488 DAG.getValueType(ObjectVT));
4489
4490 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4491}
4492
4493SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4494 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4495 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4496 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4497 // TODO: add description of PPC stack frame format, or at least some docs.
4498 //
4499 bool isELFv2ABI = Subtarget.isELFv2ABI();
4500 bool isLittleEndian = Subtarget.isLittleEndian();
4502 MachineFrameInfo &MFI = MF.getFrameInfo();
4503 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4504
4505 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4506 "fastcc not supported on varargs functions");
4507
4508 EVT PtrVT = getPointerTy(MF.getDataLayout());
4509 // Potential tail calls could cause overwriting of argument stack slots.
4510 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4511 (CallConv == CallingConv::Fast));
4512 unsigned PtrByteSize = 8;
4513 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4514
4515 static const MCPhysReg GPR[] = {
4516 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4517 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4518 };
4519 static const MCPhysReg VR[] = {
4520 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4521 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4522 };
4523
4524 const unsigned Num_GPR_Regs = std::size(GPR);
4525 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4526 const unsigned Num_VR_Regs = std::size(VR);
4527
4528 // Do a first pass over the arguments to determine whether the ABI
4529 // guarantees that our caller has allocated the parameter save area
4530 // on its stack frame. In the ELFv1 ABI, this is always the case;
4531 // in the ELFv2 ABI, it is true if this is a vararg function or if
4532 // any parameter is located in a stack slot.
4533
4534 bool HasParameterArea = !isELFv2ABI || isVarArg;
4535 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4536 unsigned NumBytes = LinkageSize;
4537 unsigned AvailableFPRs = Num_FPR_Regs;
4538 unsigned AvailableVRs = Num_VR_Regs;
4539 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4540 if (Ins[i].Flags.isNest())
4541 continue;
4542
4543 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4544 PtrByteSize, LinkageSize, ParamAreaSize,
4545 NumBytes, AvailableFPRs, AvailableVRs))
4546 HasParameterArea = true;
4547 }
4548
4549 // Add DAG nodes to load the arguments or copy them out of registers. On
4550 // entry to a function on PPC, the arguments start after the linkage area,
4551 // although the first ones are often in registers.
4552
4553 unsigned ArgOffset = LinkageSize;
4554 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4557 unsigned CurArgIdx = 0;
4558 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4559 SDValue ArgVal;
4560 bool needsLoad = false;
4561 EVT ObjectVT = Ins[ArgNo].VT;
4562 EVT OrigVT = Ins[ArgNo].ArgVT;
4563 unsigned ObjSize = ObjectVT.getStoreSize();
4564 unsigned ArgSize = ObjSize;
4565 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4566 if (Ins[ArgNo].isOrigArg()) {
4567 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4568 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4569 }
4570 // We re-align the argument offset for each argument, except when using the
4571 // fast calling convention, when we need to make sure we do that only when
4572 // we'll actually use a stack slot.
4573 unsigned CurArgOffset;
4574 Align Alignment;
4575 auto ComputeArgOffset = [&]() {
4576 /* Respect alignment of argument on the stack. */
4577 Alignment =
4578 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4579 ArgOffset = alignTo(ArgOffset, Alignment);
4580 CurArgOffset = ArgOffset;
4581 };
4582
4583 if (CallConv != CallingConv::Fast) {
4584 ComputeArgOffset();
4585
4586 /* Compute GPR index associated with argument offset. */
4587 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4588 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4589 }
4590
4591 // FIXME the codegen can be much improved in some cases.
4592 // We do not have to keep everything in memory.
4593 if (Flags.isByVal()) {
4594 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4595
4596 if (CallConv == CallingConv::Fast)
4597 ComputeArgOffset();
4598
4599 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4600 ObjSize = Flags.getByValSize();
4601 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4602 // Empty aggregate parameters do not take up registers. Examples:
4603 // struct { } a;
4604 // union { } b;
4605 // int c[0];
4606 // etc. However, we have to provide a place-holder in InVals, so
4607 // pretend we have an 8-byte item at the current address for that
4608 // purpose.
4609 if (!ObjSize) {
4610 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4611 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4612 InVals.push_back(FIN);
4613 continue;
4614 }
4615
4616 // Create a stack object covering all stack doublewords occupied
4617 // by the argument. If the argument is (fully or partially) on
4618 // the stack, or if the argument is fully in registers but the
4619 // caller has allocated the parameter save anyway, we can refer
4620 // directly to the caller's stack frame. Otherwise, create a
4621 // local copy in our own frame.
4622 int FI;
4623 if (HasParameterArea ||
4624 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4625 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4626 else
4627 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4628 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629
4630 // Handle aggregates smaller than 8 bytes.
4631 if (ObjSize < PtrByteSize) {
4632 // The value of the object is its address, which differs from the
4633 // address of the enclosing doubleword on big-endian systems.
4634 SDValue Arg = FIN;
4635 if (!isLittleEndian) {
4636 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4637 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4638 }
4639 InVals.push_back(Arg);
4640
4641 if (GPR_idx != Num_GPR_Regs) {
4642 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4643 FuncInfo->addLiveInAttr(VReg, Flags);
4644 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4645 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4646 SDValue Store =
4647 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4648 MachinePointerInfo(&*FuncArg), ObjType);
4649 MemOps.push_back(Store);
4650 }
4651 // Whether we copied from a register or not, advance the offset
4652 // into the parameter save area by a full doubleword.
4653 ArgOffset += PtrByteSize;
4654 continue;
4655 }
4656
4657 // The value of the object is its address, which is the address of
4658 // its first stack doubleword.
4659 InVals.push_back(FIN);
4660
4661 // Store whatever pieces of the object are in registers to memory.
4662 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4663 if (GPR_idx == Num_GPR_Regs)
4664 break;
4665
4666 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4667 FuncInfo->addLiveInAttr(VReg, Flags);
4668 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4669 SDValue Addr = FIN;
4670 if (j) {
4671 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4672 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4673 }
4674 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4675 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4676 SDValue Store =
4677 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4678 MachinePointerInfo(&*FuncArg, j), ObjType);
4679 MemOps.push_back(Store);
4680 ++GPR_idx;
4681 }
4682 ArgOffset += ArgSize;
4683 continue;
4684 }
4685
4686 switch (ObjectVT.getSimpleVT().SimpleTy) {
4687 default: llvm_unreachable("Unhandled argument type!");
4688 case MVT::i1:
4689 case MVT::i32:
4690 case MVT::i64:
4691 if (Flags.isNest()) {
4692 // The 'nest' parameter, if any, is passed in R11.
4693 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4694 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4695
4696 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4697 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4698
4699 break;
4700 }
4701
4702 // These can be scalar arguments or elements of an integer array type
4703 // passed directly. Clang may use those instead of "byval" aggregate
4704 // types to avoid forcing arguments to memory unnecessarily.
4705 if (GPR_idx != Num_GPR_Regs) {
4706 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4707 FuncInfo->addLiveInAttr(VReg, Flags);
4708 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4709
4710 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4711 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4712 // value to MVT::i64 and then truncate to the correct register size.
4713 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4714 } else {
4715 if (CallConv == CallingConv::Fast)
4716 ComputeArgOffset();
4717
4718 needsLoad = true;
4719 ArgSize = PtrByteSize;
4720 }
4721 if (CallConv != CallingConv::Fast || needsLoad)
4722 ArgOffset += 8;
4723 break;
4724
4725 case MVT::f32:
4726 case MVT::f64:
4727 // These can be scalar arguments or elements of a float array type
4728 // passed directly. The latter are used to implement ELFv2 homogenous
4729 // float aggregates.
4730 if (FPR_idx != Num_FPR_Regs) {
4731 unsigned VReg;
4732
4733 if (ObjectVT == MVT::f32)
4734 VReg = MF.addLiveIn(FPR[FPR_idx],
4735 Subtarget.hasP8Vector()
4736 ? &PPC::VSSRCRegClass
4737 : &PPC::F4RCRegClass);
4738 else
4739 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4740 ? &PPC::VSFRCRegClass
4741 : &PPC::F8RCRegClass);
4742
4743 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4744 ++FPR_idx;
4745 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4746 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4747 // once we support fp <-> gpr moves.
4748
4749 // This can only ever happen in the presence of f32 array types,
4750 // since otherwise we never run out of FPRs before running out
4751 // of GPRs.
4752 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4753 FuncInfo->addLiveInAttr(VReg, Flags);
4754 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4755
4756 if (ObjectVT == MVT::f32) {
4757 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4758 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4759 DAG.getConstant(32, dl, MVT::i32));
4760 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4761 }
4762
4763 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4764 } else {
4765 if (CallConv == CallingConv::Fast)
4766 ComputeArgOffset();
4767
4768 needsLoad = true;
4769 }
4770
4771 // When passing an array of floats, the array occupies consecutive
4772 // space in the argument area; only round up to the next doubleword
4773 // at the end of the array. Otherwise, each float takes 8 bytes.
4774 if (CallConv != CallingConv::Fast || needsLoad) {
4775 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4776 ArgOffset += ArgSize;
4777 if (Flags.isInConsecutiveRegsLast())
4778 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4779 }
4780 break;
4781 case MVT::v4f32:
4782 case MVT::v4i32:
4783 case MVT::v8i16:
4784 case MVT::v16i8:
4785 case MVT::v2f64:
4786 case MVT::v2i64:
4787 case MVT::v1i128:
4788 case MVT::f128:
4789 // These can be scalar arguments or elements of a vector array type
4790 // passed directly. The latter are used to implement ELFv2 homogenous
4791 // vector aggregates.
4792 if (VR_idx != Num_VR_Regs) {
4793 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4794 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4795 ++VR_idx;
4796 } else {
4797 if (CallConv == CallingConv::Fast)
4798 ComputeArgOffset();
4799 needsLoad = true;
4800 }
4801 if (CallConv != CallingConv::Fast || needsLoad)
4802 ArgOffset += 16;
4803 break;
4804 }
4805
4806 // We need to load the argument to a virtual register if we determined
4807 // above that we ran out of physical registers of the appropriate type.
4808 if (needsLoad) {
4809 if (ObjSize < ArgSize && !isLittleEndian)
4810 CurArgOffset += ArgSize - ObjSize;
4811 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4812 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4813 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4814 }
4815
4816 InVals.push_back(ArgVal);
4817 }
4818
4819 // Area that is at least reserved in the caller of this function.
4820 unsigned MinReservedArea;
4821 if (HasParameterArea)
4822 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4823 else
4824 MinReservedArea = LinkageSize;
4825
4826 // Set the size that is at least reserved in caller of this function. Tail
4827 // call optimized functions' reserved stack space needs to be aligned so that
4828 // taking the difference between two stack areas will result in an aligned
4829 // stack.
4830 MinReservedArea =
4831 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4832 FuncInfo->setMinReservedArea(MinReservedArea);
4833
4834 // If the function takes variable number of arguments, make a frame index for
4835 // the start of the first vararg value... for expansion of llvm.va_start.
4836 // On ELFv2ABI spec, it writes:
4837 // C programs that are intended to be *portable* across different compilers
4838 // and architectures must use the header file <stdarg.h> to deal with variable
4839 // argument lists.
4840 if (isVarArg && MFI.hasVAStart()) {
4841 int Depth = ArgOffset;
4842
4843 FuncInfo->setVarArgsFrameIndex(
4844 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4845 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4846
4847 // If this function is vararg, store any remaining integer argument regs
4848 // to their spots on the stack so that they may be loaded by dereferencing
4849 // the result of va_next.
4850 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4851 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4852 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4853 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4854 SDValue Store =
4855 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4856 MemOps.push_back(Store);
4857 // Increment the address by four for the next argument to store
4858 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4859 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4860 }
4861 }
4862
4863 if (!MemOps.empty())
4864 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4865
4866 return Chain;
4867}
4868
4869/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4870/// adjusted to accommodate the arguments for the tailcall.
4871static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4872 unsigned ParamSize) {
4873
4874 if (!isTailCall) return 0;
4875
4877 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4878 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4879 // Remember only if the new adjustment is bigger.
4880 if (SPDiff < FI->getTailCallSPDelta())
4881 FI->setTailCallSPDelta(SPDiff);
4882
4883 return SPDiff;
4884}
4885
4886static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4887
4888static bool callsShareTOCBase(const Function *Caller,
4889 const GlobalValue *CalleeGV,
4890 const TargetMachine &TM) {
4891 // It does not make sense to call callsShareTOCBase() with a caller that
4892 // is PC Relative since PC Relative callers do not have a TOC.
4893#ifndef NDEBUG
4894 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4895 assert(!STICaller->isUsingPCRelativeCalls() &&
4896 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4897#endif
4898
4899 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4900 // don't have enough information to determine if the caller and callee share
4901 // the same TOC base, so we have to pessimistically assume they don't for
4902 // correctness.
4903 if (!CalleeGV)
4904 return false;
4905
4906 // If the callee is preemptable, then the static linker will use a plt-stub
4907 // which saves the toc to the stack, and needs a nop after the call
4908 // instruction to convert to a toc-restore.
4909 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4910 return false;
4911
4912 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4913 // We may need a TOC restore in the situation where the caller requires a
4914 // valid TOC but the callee is PC Relative and does not.
4915 const Function *F = dyn_cast<Function>(CalleeGV);
4916 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4917
4918 // If we have an Alias we can try to get the function from there.
4919 if (Alias) {
4920 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4921 F = dyn_cast<Function>(GlobalObj);
4922 }
4923
4924 // If we still have no valid function pointer we do not have enough
4925 // information to determine if the callee uses PC Relative calls so we must
4926 // assume that it does.
4927 if (!F)
4928 return false;
4929
4930 // If the callee uses PC Relative we cannot guarantee that the callee won't
4931 // clobber the TOC of the caller and so we must assume that the two
4932 // functions do not share a TOC base.
4933 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4934 if (STICallee->isUsingPCRelativeCalls())
4935 return false;
4936
4937 // If the GV is not a strong definition then we need to assume it can be
4938 // replaced by another function at link time. The function that replaces
4939 // it may not share the same TOC as the caller since the callee may be
4940 // replaced by a PC Relative version of the same function.
4941 if (!CalleeGV->isStrongDefinitionForLinker())
4942 return false;
4943
4944 // The medium and large code models are expected to provide a sufficiently
4945 // large TOC to provide all data addressing needs of a module with a
4946 // single TOC.
4947 if (CodeModel::Medium == TM.getCodeModel() ||
4948 CodeModel::Large == TM.getCodeModel())
4949 return true;
4950
4951 // Any explicitly-specified sections and section prefixes must also match.
4952 // Also, if we're using -ffunction-sections, then each function is always in
4953 // a different section (the same is true for COMDAT functions).
4954 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4955 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4956 return false;
4957 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4958 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4959 return false;
4960 }
4961
4962 return true;
4963}
4964
4965static bool
4967 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4968 assert(Subtarget.is64BitELFABI());
4969
4970 const unsigned PtrByteSize = 8;
4971 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4972
4973 static const MCPhysReg GPR[] = {
4974 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4975 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4976 };
4977 static const MCPhysReg VR[] = {
4978 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4979 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4980 };
4981
4982 const unsigned NumGPRs = std::size(GPR);
4983 const unsigned NumFPRs = 13;
4984 const unsigned NumVRs = std::size(VR);
4985 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4986
4987 unsigned NumBytes = LinkageSize;
4988 unsigned AvailableFPRs = NumFPRs;
4989 unsigned AvailableVRs = NumVRs;
4990
4991 for (const ISD::OutputArg& Param : Outs) {
4992 if (Param.Flags.isNest()) continue;
4993
4994 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4995 LinkageSize, ParamAreaSize, NumBytes,
4996 AvailableFPRs, AvailableVRs))
4997 return true;
4998 }
4999 return false;
5000}
5001
5002static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5003 if (CB.arg_size() != CallerFn->arg_size())
5004 return false;
5005
5006 auto CalleeArgIter = CB.arg_begin();
5007 auto CalleeArgEnd = CB.arg_end();
5008 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5009
5010 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5011 const Value* CalleeArg = *CalleeArgIter;
5012 const Value* CallerArg = &(*CallerArgIter);
5013 if (CalleeArg == CallerArg)
5014 continue;
5015
5016 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5017 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5018 // }
5019 // 1st argument of callee is undef and has the same type as caller.
5020 if (CalleeArg->getType() == CallerArg->getType() &&
5021 isa<UndefValue>(CalleeArg))
5022 continue;
5023
5024 return false;
5025 }
5026
5027 return true;
5028}
5029
5030// Returns true if TCO is possible between the callers and callees
5031// calling conventions.
5032static bool
5034 CallingConv::ID CalleeCC) {
5035 // Tail calls are possible with fastcc and ccc.
5036 auto isTailCallableCC = [] (CallingConv::ID CC){
5037 return CC == CallingConv::C || CC == CallingConv::Fast;
5038 };
5039 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5040 return false;
5041
5042 // We can safely tail call both fastcc and ccc callees from a c calling
5043 // convention caller. If the caller is fastcc, we may have less stack space
5044 // than a non-fastcc caller with the same signature so disable tail-calls in
5045 // that case.
5046 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5047}
5048
5049bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5050 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5051 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5053 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5054 bool isCalleeExternalSymbol) const {
5055 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5056
5057 if (DisableSCO && !TailCallOpt) return false;
5058
5059 // Variadic argument functions are not supported.
5060 if (isVarArg) return false;
5061
5062 // Check that the calling conventions are compatible for tco.
5063 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5064 return false;
5065
5066 // Caller contains any byval parameter is not supported.
5067 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5068 return false;
5069
5070 // Callee contains any byval parameter is not supported, too.
5071 // Note: This is a quick work around, because in some cases, e.g.
5072 // caller's stack size > callee's stack size, we are still able to apply
5073 // sibling call optimization. For example, gcc is able to do SCO for caller1
5074 // in the following example, but not for caller2.
5075 // struct test {
5076 // long int a;
5077 // char ary[56];
5078 // } gTest;
5079 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5080 // b->a = v.a;
5081 // return 0;
5082 // }
5083 // void caller1(struct test a, struct test c, struct test *b) {
5084 // callee(gTest, b); }
5085 // void caller2(struct test *b) { callee(gTest, b); }
5086 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5087 return false;
5088
5089 // If callee and caller use different calling conventions, we cannot pass
5090 // parameters on stack since offsets for the parameter area may be different.
5091 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5092 return false;
5093
5094 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5095 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5096 // callee potentially have different TOC bases then we cannot tail call since
5097 // we need to restore the TOC pointer after the call.
5098 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5099 // We cannot guarantee this for indirect calls or calls to external functions.
5100 // When PC-Relative addressing is used, the concept of the TOC is no longer
5101 // applicable so this check is not required.
5102 // Check first for indirect calls.
5103 if (!Subtarget.isUsingPCRelativeCalls() &&
5104 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5105 return false;
5106
5107 // Check if we share the TOC base.
5108 if (!Subtarget.isUsingPCRelativeCalls() &&
5109 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5110 return false;
5111
5112 // TCO allows altering callee ABI, so we don't have to check further.
5113 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5114 return true;
5115
5116 if (DisableSCO) return false;
5117
5118 // If callee use the same argument list that caller is using, then we can
5119 // apply SCO on this case. If it is not, then we need to check if callee needs
5120 // stack for passing arguments.
5121 // PC Relative tail calls may not have a CallBase.
5122 // If there is no CallBase we cannot verify if we have the same argument
5123 // list so assume that we don't have the same argument list.
5124 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5125 needStackSlotPassParameters(Subtarget, Outs))
5126 return false;
5127 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5128 return false;
5129
5130 return true;
5131}
5132
5133/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5134/// for tail call optimization. Targets which want to do tail call
5135/// optimization should implement this function.
5136bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5137 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5138 CallingConv::ID CallerCC, bool isVarArg,
5139 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5140 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5141 return false;
5142
5143 // Variable argument functions are not supported.
5144 if (isVarArg)
5145 return false;
5146
5147 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5148 // Functions containing by val parameters are not supported.
5149 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5150 return false;
5151
5152 // Non-PIC/GOT tail calls are supported.
5153 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5154 return true;
5155
5156 // At the moment we can only do local tail calls (in same module, hidden
5157 // or protected) if we are generating PIC.
5158 if (CalleeGV)
5159 return CalleeGV->hasHiddenVisibility() ||
5160 CalleeGV->hasProtectedVisibility();
5161 }
5162
5163 return false;
5164}
5165
5166/// isCallCompatibleAddress - Return the immediate to use if the specified
5167/// 32-bit value is representable in the immediate field of a BxA instruction.
5169 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5170 if (!C) return nullptr;
5171
5172 int Addr = C->getZExtValue();
5173 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5174 SignExtend32<26>(Addr) != Addr)
5175 return nullptr; // Top 6 bits have to be sext of immediate.
5176
5177 return DAG
5178 .getConstant(
5179 (int)C->getZExtValue() >> 2, SDLoc(Op),
5181 .getNode();
5182}
5183
5184namespace {
5185
5186struct TailCallArgumentInfo {
5187 SDValue Arg;
5188 SDValue FrameIdxOp;
5189 int FrameIdx = 0;
5190
5191 TailCallArgumentInfo() = default;
5192};
5193
5194} // end anonymous namespace
5195
5196/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5198 SelectionDAG &DAG, SDValue Chain,
5199 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5200 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5201 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5202 SDValue Arg = TailCallArgs[i].Arg;
5203 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5204 int FI = TailCallArgs[i].FrameIdx;
5205 // Store relative to framepointer.
5206 MemOpChains.push_back(DAG.getStore(
5207 Chain, dl, Arg, FIN,
5209 }
5210}
5211
5212/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5213/// the appropriate stack slot for the tail call optimized function call.
5215 SDValue OldRetAddr, SDValue OldFP,
5216 int SPDiff, const SDLoc &dl) {
5217 if (SPDiff) {
5218 // Calculate the new stack slot for the return address.
5220 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5221 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5222 bool isPPC64 = Subtarget.isPPC64();
5223 int SlotSize = isPPC64 ? 8 : 4;
5224 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5225 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5226 NewRetAddrLoc, true);
5227 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5228 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5229 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5230 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5231 }
5232 return Chain;
5233}
5234
5235/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5236/// the position of the argument.
5237static void
5239 SDValue Arg, int SPDiff, unsigned ArgOffset,
5240 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5241 int Offset = ArgOffset + SPDiff;
5242 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5243 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5244 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5245 SDValue FIN = DAG.getFrameIndex(FI, VT);
5246 TailCallArgumentInfo Info;
5247 Info.Arg = Arg;
5248 Info.FrameIdxOp = FIN;
5249 Info.FrameIdx = FI;
5250 TailCallArguments.push_back(Info);
5251}
5252
5253/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5254/// stack slot. Returns the chain as result and the loaded frame pointers in
5255/// LROpOut/FPOpout. Used when tail calling.
5256SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5257 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5258 SDValue &FPOpOut, const SDLoc &dl) const {
5259 if (SPDiff) {
5260 // Load the LR and FP stack slot for later adjusting.
5261 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5262 LROpOut = getReturnAddrFrameIndex(DAG);
5263 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5264 Chain = SDValue(LROpOut.getNode(), 1);
5265 }
5266 return Chain;
5267}
5268
5269/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5270/// by "Src" to address "Dst" of size "Size". Alignment information is
5271/// specified by the specific parameter attribute. The copy will be passed as
5272/// a byval function parameter.
5273/// Sometimes what we are copying is the end of a larger object, the part that
5274/// does not fit in registers.
5276 SDValue Chain, ISD::ArgFlagsTy Flags,
5277 SelectionDAG &DAG, const SDLoc &dl) {
5278 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5279 return DAG.getMemcpy(
5280 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5281 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5282}
5283
5284/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5285/// tail calls.
5287 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5288 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5289 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5290 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5292 if (!isTailCall) {
5293 if (isVector) {
5294 SDValue StackPtr;
5295 if (isPPC64)
5296 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5297 else
5298 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5299 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5300 DAG.getConstant(ArgOffset, dl, PtrVT));
5301 }
5302 MemOpChains.push_back(
5303 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5304 // Calculate and remember argument location.
5305 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5306 TailCallArguments);
5307}
5308
5309static void
5311 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5312 SDValue FPOp,
5313 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5314 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5315 // might overwrite each other in case of tail call optimization.
5316 SmallVector<SDValue, 8> MemOpChains2;
5317 // Do not flag preceding copytoreg stuff together with the following stuff.
5318 InGlue = SDValue();
5319 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5320 MemOpChains2, dl);
5321 if (!MemOpChains2.empty())
5322 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5323
5324 // Store the return address to the appropriate stack slot.
5325 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5326
5327 // Emit callseq_end just before tailcall node.
5328 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5329 InGlue = Chain.getValue(1);
5330}
5331
5332// Is this global address that of a function that can be called by name? (as
5333// opposed to something that must hold a descriptor for an indirect call).
5334static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5335 if (GV) {
5336 if (GV->isThreadLocal())
5337 return false;
5338
5339 return GV->getValueType()->isFunctionTy();
5340 }
5341
5342 return false;
5343}
5344
5345SDValue PPCTargetLowering::LowerCallResult(
5346 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5347 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5348 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5350 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5351 *DAG.getContext());
5352
5353 CCRetInfo.AnalyzeCallResult(
5354 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5356 : RetCC_PPC);
5357
5358 // Copy all of the result registers out of their specified physreg.
5359 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5360 CCValAssign &VA = RVLocs[i];
5361 assert(VA.isRegLoc() && "Can only return in registers!");
5362
5363 SDValue Val;
5364
5365 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5366 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5367 InGlue);
5368 Chain = Lo.getValue(1);
5369 InGlue = Lo.getValue(2);
5370 VA = RVLocs[++i]; // skip ahead to next loc
5371 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5372 InGlue);
5373 Chain = Hi.getValue(1);
5374 InGlue = Hi.getValue(2);
5375 if (!Subtarget.isLittleEndian())
5376 std::swap (Lo, Hi);
5377 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5378 } else {
5379 Val = DAG.getCopyFromReg(Chain, dl,
5380 VA.getLocReg(), VA.getLocVT(), InGlue);
5381 Chain = Val.getValue(1);
5382 InGlue = Val.getValue(2);
5383 }
5384
5385 switch (VA.getLocInfo()) {
5386 default: llvm_unreachable("Unknown loc info!");
5387 case CCValAssign::Full: break;
5388 case CCValAssign::AExt:
5389 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5390 break;
5391 case CCValAssign::ZExt:
5392 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5393 DAG.getValueType(VA.getValVT()));
5394 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5395 break;
5396 case CCValAssign::SExt:
5397 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5398 DAG.getValueType(VA.getValVT()));
5399 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5400 break;
5401 }
5402
5403 InVals.push_back(Val);
5404 }
5405
5406 return Chain;
5407}
5408
5409static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5410 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5411 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5412 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5413
5414 // PatchPoint calls are not indirect.
5415 if (isPatchPoint)
5416 return false;
5417
5418 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5419 return false;
5420
5421 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5422 // becuase the immediate function pointer points to a descriptor instead of
5423 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5424 // pointer immediate points to the global entry point, while the BLA would
5425 // need to jump to the local entry point (see rL211174).
5426 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5427 isBLACompatibleAddress(Callee, DAG))
5428 return false;
5429
5430 return true;
5431}
5432
5433// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5434static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5435 return Subtarget.isAIXABI() ||
5436 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5437}
5438
5440 const Function &Caller, const SDValue &Callee,
5441 const PPCSubtarget &Subtarget,
5442 const TargetMachine &TM,
5443 bool IsStrictFPCall = false) {
5444 if (CFlags.IsTailCall)
5445 return PPCISD::TC_RETURN;
5446
5447 unsigned RetOpc = 0;
5448 // This is a call through a function pointer.
5449 if (CFlags.IsIndirect) {
5450 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5451 // indirect calls. The save of the caller's TOC pointer to the stack will be
5452 // inserted into the DAG as part of call lowering. The restore of the TOC
5453 // pointer is modeled by using a pseudo instruction for the call opcode that
5454 // represents the 2 instruction sequence of an indirect branch and link,
5455 // immediately followed by a load of the TOC pointer from the stack save
5456 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5457 // as it is not saved or used.
5459 : PPCISD::BCTRL;
5460 } else if (Subtarget.isUsingPCRelativeCalls()) {
5461 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5462 RetOpc = PPCISD::CALL_NOTOC;
5463 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5464 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5465 // immediately following the call instruction if the caller and callee may
5466 // have different TOC bases. At link time if the linker determines the calls
5467 // may not share a TOC base, the call is redirected to a trampoline inserted
5468 // by the linker. The trampoline will (among other things) save the callers
5469 // TOC pointer at an ABI designated offset in the linkage area and the
5470 // linker will rewrite the nop to be a load of the TOC pointer from the
5471 // linkage area into gpr2.
5472 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5473 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5474 RetOpc =
5476 } else
5477 RetOpc = PPCISD::CALL;
5478 if (IsStrictFPCall) {
5479 switch (RetOpc) {
5480 default:
5481 llvm_unreachable("Unknown call opcode");
5484 break;
5485 case PPCISD::BCTRL:
5486 RetOpc = PPCISD::BCTRL_RM;
5487 break;
5488 case PPCISD::CALL_NOTOC:
5489 RetOpc = PPCISD::CALL_NOTOC_RM;
5490 break;
5491 case PPCISD::CALL:
5492 RetOpc = PPCISD::CALL_RM;
5493 break;
5494 case PPCISD::CALL_NOP:
5495 RetOpc = PPCISD::CALL_NOP_RM;
5496 break;
5497 }
5498 }
5499 return RetOpc;
5500}
5501
5502static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5503 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5504 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5505 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5506 return SDValue(Dest, 0);
5507
5508 // Returns true if the callee is local, and false otherwise.
5509 auto isLocalCallee = [&]() {
5510 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5511 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5512
5513 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5514 !isa_and_nonnull<GlobalIFunc>(GV);
5515 };
5516
5517 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5518 // a static relocation model causes some versions of GNU LD (2.17.50, at
5519 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5520 // built with secure-PLT.
5521 bool UsePlt =
5522 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5524
5525 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5526 const TargetMachine &TM = Subtarget.getTargetMachine();
5527 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5528 MCSymbolXCOFF *S =
5529 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5530
5532 return DAG.getMCSymbol(S, PtrVT);
5533 };
5534
5535 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5536 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5537 if (isFunctionGlobalAddress(GV)) {
5538 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5539
5540 if (Subtarget.isAIXABI()) {
5541 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5542 return getAIXFuncEntryPointSymbolSDNode(GV);
5543 }
5544 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5545 UsePlt ? PPCII::MO_PLT : 0);
5546 }
5547
5548 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5549 const char *SymName = S->getSymbol();
5550 if (Subtarget.isAIXABI()) {
5551 // If there exists a user-declared function whose name is the same as the
5552 // ExternalSymbol's, then we pick up the user-declared version.
5554 if (const Function *F =
5555 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5556 return getAIXFuncEntryPointSymbolSDNode(F);
5557
5558 // On AIX, direct function calls reference the symbol for the function's
5559 // entry point, which is named by prepending a "." before the function's
5560 // C-linkage name. A Qualname is returned here because an external
5561 // function entry point is a csect with XTY_ER property.
5562 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5563 auto &Context = DAG.getMachineFunction().getContext();
5564 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5565 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5567 return Sec->getQualNameSymbol();
5568 };
5569
5570 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5571 }
5572 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5573 UsePlt ? PPCII::MO_PLT : 0);
5574 }
5575
5576 // No transformation needed.
5577 assert(Callee.getNode() && "What no callee?");
5578 return Callee;
5579}
5580
5582 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5583 "Expected a CALLSEQ_STARTSDNode.");
5584
5585 // The last operand is the chain, except when the node has glue. If the node
5586 // has glue, then the last operand is the glue, and the chain is the second
5587 // last operand.
5588 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5589 if (LastValue.getValueType() != MVT::Glue)
5590 return LastValue;
5591
5592 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5593}
5594
5595// Creates the node that moves a functions address into the count register
5596// to prepare for an indirect call instruction.
5597static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5598 SDValue &Glue, SDValue &Chain,
5599 const SDLoc &dl) {
5600 SDValue MTCTROps[] = {Chain, Callee, Glue};
5601 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5602 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5603 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5604 // The glue is the second value produced.
5605 Glue = Chain.getValue(1);
5606}
5607
5609 SDValue &Glue, SDValue &Chain,
5610 SDValue CallSeqStart,
5611 const CallBase *CB, const SDLoc &dl,
5612 bool hasNest,
5613 const PPCSubtarget &Subtarget) {
5614 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5615 // entry point, but to the function descriptor (the function entry point
5616 // address is part of the function descriptor though).
5617 // The function descriptor is a three doubleword structure with the
5618 // following fields: function entry point, TOC base address and
5619 // environment pointer.
5620 // Thus for a call through a function pointer, the following actions need
5621 // to be performed:
5622 // 1. Save the TOC of the caller in the TOC save area of its stack
5623 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5624 // 2. Load the address of the function entry point from the function
5625 // descriptor.
5626 // 3. Load the TOC of the callee from the function descriptor into r2.
5627 // 4. Load the environment pointer from the function descriptor into
5628 // r11.
5629 // 5. Branch to the function entry point address.
5630 // 6. On return of the callee, the TOC of the caller needs to be
5631 // restored (this is done in FinishCall()).
5632 //
5633 // The loads are scheduled at the beginning of the call sequence, and the
5634 // register copies are flagged together to ensure that no other
5635 // operations can be scheduled in between. E.g. without flagging the
5636 // copies together, a TOC access in the caller could be scheduled between
5637 // the assignment of the callee TOC and the branch to the callee, which leads
5638 // to incorrect code.
5639
5640 // Start by loading the function address from the descriptor.
5641 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5642 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5646
5647 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5648
5649 // Registers used in building the DAG.
5650 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5651 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5652
5653 // Offsets of descriptor members.
5654 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5655 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5656
5657 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5658 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5659
5660 // One load for the functions entry point address.
5661 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5662 Alignment, MMOFlags);
5663
5664 // One for loading the TOC anchor for the module that contains the called
5665 // function.
5666 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5667 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5668 SDValue TOCPtr =
5669 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5670 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5671
5672 // One for loading the environment pointer.
5673 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5674 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5675 SDValue LoadEnvPtr =
5676 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5677 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5678
5679
5680 // Then copy the newly loaded TOC anchor to the TOC pointer.
5681 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5682 Chain = TOCVal.getValue(0);
5683 Glue = TOCVal.getValue(1);
5684
5685 // If the function call has an explicit 'nest' parameter, it takes the
5686 // place of the environment pointer.
5687 assert((!hasNest || !Subtarget.isAIXABI()) &&
5688 "Nest parameter is not supported on AIX.");
5689 if (!hasNest) {
5690 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5691 Chain = EnvVal.getValue(0);
5692 Glue = EnvVal.getValue(1);
5693 }
5694
5695 // The rest of the indirect call sequence is the same as the non-descriptor
5696 // DAG.
5697 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5698}
5699
5700static void
5702 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5703 SelectionDAG &DAG,
5704 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5705 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5706 const PPCSubtarget &Subtarget) {
5707 const bool IsPPC64 = Subtarget.isPPC64();
5708 // MVT for a general purpose register.
5709 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5710
5711 // First operand is always the chain.
5712 Ops.push_back(Chain);
5713
5714 // If it's a direct call pass the callee as the second operand.
5715 if (!CFlags.IsIndirect)
5716 Ops.push_back(Callee);
5717 else {
5718 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5719
5720 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5721 // on the stack (this would have been done in `LowerCall_64SVR4` or
5722 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5723 // represents both the indirect branch and a load that restores the TOC
5724 // pointer from the linkage area. The operand for the TOC restore is an add
5725 // of the TOC save offset to the stack pointer. This must be the second
5726 // operand: after the chain input but before any other variadic arguments.
5727 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5728 // saved or used.
5729 if (isTOCSaveRestoreRequired(Subtarget)) {
5730 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5731
5732 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5733 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5734 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5735 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5736 Ops.push_back(AddTOC);
5737 }
5738
5739 // Add the register used for the environment pointer.
5740 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5742 RegVT));
5743
5744
5745 // Add CTR register as callee so a bctr can be emitted later.
5746 if (CFlags.IsTailCall)
5747 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5748 }
5749
5750 // If this is a tail call add stack pointer delta.
5751 if (CFlags.IsTailCall)
5752 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5753
5754 // Add argument registers to the end of the list so that they are known live
5755 // into the call.
5756 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5757 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5758 RegsToPass[i].second.getValueType()));
5759
5760 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5761 // no way to mark dependencies as implicit here.
5762 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5763 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5764 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5765 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5766
5767 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5768 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5769 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5770
5771 // Add a register mask operand representing the call-preserved registers.
5772 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5773 const uint32_t *Mask =
5774 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5775 assert(Mask && "Missing call preserved mask for calling convention");
5776 Ops.push_back(DAG.getRegisterMask(Mask));
5777
5778 // If the glue is valid, it is the last operand.
5779 if (Glue.getNode())
5780 Ops.push_back(Glue);
5781}
5782
5783SDValue PPCTargetLowering::FinishCall(
5784 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5785 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5786 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5787 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5788 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5789
5790 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5791 Subtarget.isAIXABI())
5792 setUsesTOCBasePtr(DAG);
5793
5794 unsigned CallOpc =
5795 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5796 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5797
5798 if (!CFlags.IsIndirect)
5799 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5800 else if (Subtarget.usesFunctionDescriptors())
5801 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5802 dl, CFlags.HasNest, Subtarget);
5803 else
5804 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5805
5806 // Build the operand list for the call instruction.
5808 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5809 SPDiff, Subtarget);
5810
5811 // Emit tail call.
5812 if (CFlags.IsTailCall) {
5813 // Indirect tail call when using PC Relative calls do not have the same
5814 // constraints.
5815 assert(((Callee.getOpcode() == ISD::Register &&
5816 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5817 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5818 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5819 isa<ConstantSDNode>(Callee) ||
5820 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5821 "Expecting a global address, external symbol, absolute value, "
5822 "register or an indirect tail call when PC Relative calls are "
5823 "used.");
5824 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5825 assert(CallOpc == PPCISD::TC_RETURN &&
5826 "Unexpected call opcode for a tail call.");
5828 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5829 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5830 return Ret;
5831 }
5832
5833 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5834 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5835 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5836 Glue = Chain.getValue(1);
5837
5838 // When performing tail call optimization the callee pops its arguments off
5839 // the stack. Account for this here so these bytes can be pushed back on in
5840 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5841 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5843 ? NumBytes
5844 : 0;
5845
5846 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5847 Glue = Chain.getValue(1);
5848
5849 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5850 DAG, InVals);
5851}
5852
5854 CallingConv::ID CalleeCC = CB->getCallingConv();
5855 const Function *CallerFunc = CB->getCaller();
5856 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5857 const Function *CalleeFunc = CB->getCalledFunction();
5858 if (!CalleeFunc)
5859 return false;
5860 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5861
5864
5865 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5866 CalleeFunc->getAttributes(), Outs, *this,
5867 CalleeFunc->getDataLayout());
5868
5869 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5870 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5871 false /*isCalleeExternalSymbol*/);
5872}
5873
5874bool PPCTargetLowering::isEligibleForTCO(
5875 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5876 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5878 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5879 bool isCalleeExternalSymbol) const {
5880 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5881 return false;
5882
5883 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5884 return IsEligibleForTailCallOptimization_64SVR4(
5885 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5886 isCalleeExternalSymbol);
5887 else
5888 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5889 isVarArg, Ins);
5890}
5891
5892SDValue
5893PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5894 SmallVectorImpl<SDValue> &InVals) const {
5895 SelectionDAG &DAG = CLI.DAG;
5896 SDLoc &dl = CLI.DL;
5898 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5900 SDValue Chain = CLI.Chain;
5901 SDValue Callee = CLI.Callee;
5902 bool &isTailCall = CLI.IsTailCall;
5903 CallingConv::ID CallConv = CLI.CallConv;
5904 bool isVarArg = CLI.IsVarArg;
5905 bool isPatchPoint = CLI.IsPatchPoint;
5906 const CallBase *CB = CLI.CB;
5907
5908 if (isTailCall) {
5910 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5911 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5912 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5913 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5914
5915 isTailCall =
5916 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5917 &(MF.getFunction()), IsCalleeExternalSymbol);
5918 if (isTailCall) {
5919 ++NumTailCalls;
5920 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5921 ++NumSiblingCalls;
5922
5923 // PC Relative calls no longer guarantee that the callee is a Global
5924 // Address Node. The callee could be an indirect tail call in which
5925 // case the SDValue for the callee could be a load (to load the address
5926 // of a function pointer) or it may be a register copy (to move the
5927 // address of the callee from a function parameter into a virtual
5928 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5929 assert((Subtarget.isUsingPCRelativeCalls() ||
5930 isa<GlobalAddressSDNode>(Callee)) &&
5931 "Callee should be an llvm::Function object.");
5932
5933 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5934 << "\nTCO callee: ");
5935 LLVM_DEBUG(Callee.dump());
5936 }
5937 }
5938
5939 if (!isTailCall && CB && CB->isMustTailCall())
5940 report_fatal_error("failed to perform tail call elimination on a call "
5941 "site marked musttail");
5942
5943 // When long calls (i.e. indirect calls) are always used, calls are always
5944 // made via function pointer. If we have a function name, first translate it
5945 // into a pointer.
5946 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5947 !isTailCall)
5948 Callee = LowerGlobalAddress(Callee, DAG);
5949
5950 CallFlags CFlags(
5951 CallConv, isTailCall, isVarArg, isPatchPoint,
5952 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5953 // hasNest
5954 Subtarget.is64BitELFABI() &&
5955 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5956 CLI.NoMerge);
5957
5958 if (Subtarget.isAIXABI())
5959 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5960 InVals, CB);
5961
5962 assert(Subtarget.isSVR4ABI());
5963 if (Subtarget.isPPC64())
5964 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5965 InVals, CB);
5966 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5967 InVals, CB);
5968}
5969
5970SDValue PPCTargetLowering::LowerCall_32SVR4(
5971 SDValue Chain, SDValue Callee, CallFlags CFlags,
5973 const SmallVectorImpl<SDValue> &OutVals,
5974 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5976 const CallBase *CB) const {
5977 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5978 // of the 32-bit SVR4 ABI stack frame layout.
5979
5980 const CallingConv::ID CallConv = CFlags.CallConv;
5981 const bool IsVarArg = CFlags.IsVarArg;
5982 const bool IsTailCall = CFlags.IsTailCall;
5983
5984 assert((CallConv == CallingConv::C ||
5985 CallConv == CallingConv::Cold ||
5986 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5987
5988 const Align PtrAlign(4);
5989
5991
5992 // Mark this function as potentially containing a function that contains a
5993 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5994 // and restoring the callers stack pointer in this functions epilog. This is
5995 // done because by tail calling the called function might overwrite the value
5996 // in this function's (MF) stack pointer stack slot 0(SP).
5997 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5998 CallConv == CallingConv::Fast)
5999 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6000
6001 // Count how many bytes are to be pushed on the stack, including the linkage
6002 // area, parameter list area and the part of the local variable space which
6003 // contains copies of aggregates which are passed by value.
6004
6005 // Assign locations to all of the outgoing arguments.
6007 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6008
6009 // Reserve space for the linkage area on the stack.
6010 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6011 PtrAlign);
6012 if (useSoftFloat())
6013 CCInfo.PreAnalyzeCallOperands(Outs);
6014
6015 if (IsVarArg) {
6016 // Handle fixed and variable vector arguments differently.
6017 // Fixed vector arguments go into registers as long as registers are
6018 // available. Variable vector arguments always go into memory.
6019 unsigned NumArgs = Outs.size();
6020
6021 for (unsigned i = 0; i != NumArgs; ++i) {
6022 MVT ArgVT = Outs[i].VT;
6023 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6024 bool Result;
6025
6026 if (Outs[i].IsFixed) {
6027 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6028 CCInfo);
6029 } else {
6031 ArgFlags, CCInfo);
6032 }
6033
6034 if (Result) {
6035#ifndef NDEBUG
6036 errs() << "Call operand #" << i << " has unhandled type "
6037 << ArgVT << "\n";
6038#endif
6039 llvm_unreachable(nullptr);
6040 }
6041 }
6042 } else {
6043 // All arguments are treated the same.
6044 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6045 }
6046 CCInfo.clearWasPPCF128();
6047
6048 // Assign locations to all of the outgoing aggregate by value arguments.
6049 SmallVector<CCValAssign, 16> ByValArgLocs;
6050 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6051
6052 // Reserve stack space for the allocations in CCInfo.
6053 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6054
6055 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6056
6057 // Size of the linkage area, parameter list area and the part of the local
6058 // space variable where copies of aggregates which are passed by value are
6059 // stored.
6060 unsigned NumBytes = CCByValInfo.getStackSize();
6061
6062 // Calculate by how many bytes the stack has to be adjusted in case of tail
6063 // call optimization.
6064 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6065
6066 // Adjust the stack pointer for the new arguments...
6067 // These operations are automatically eliminated by the prolog/epilog pass
6068 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6069 SDValue CallSeqStart = Chain;
6070
6071 // Load the return address and frame pointer so it can be moved somewhere else
6072 // later.
6073 SDValue LROp, FPOp;
6074 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6075
6076 // Set up a copy of the stack pointer for use loading and storing any
6077 // arguments that may not fit in the registers available for argument
6078 // passing.
6079 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6080
6082 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6083 SmallVector<SDValue, 8> MemOpChains;
6084
6085 bool seenFloatArg = false;
6086 // Walk the register/memloc assignments, inserting copies/loads.
6087 // i - Tracks the index into the list of registers allocated for the call
6088 // RealArgIdx - Tracks the index into the list of actual function arguments
6089 // j - Tracks the index into the list of byval arguments
6090 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6091 i != e;
6092 ++i, ++RealArgIdx) {
6093 CCValAssign &VA = ArgLocs[i];
6094 SDValue Arg = OutVals[RealArgIdx];
6095 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6096
6097 if (Flags.isByVal()) {
6098 // Argument is an aggregate which is passed by value, thus we need to
6099 // create a copy of it in the local variable space of the current stack
6100 // frame (which is the stack frame of the caller) and pass the address of
6101 // this copy to the callee.
6102 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6103 CCValAssign &ByValVA = ByValArgLocs[j++];
6104 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6105
6106 // Memory reserved in the local variable space of the callers stack frame.
6107 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6108
6109 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6110 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6111 StackPtr, PtrOff);
6112
6113 // Create a copy of the argument in the local area of the current
6114 // stack frame.
6115 SDValue MemcpyCall =
6116 CreateCopyOfByValArgument(Arg, PtrOff,
6117 CallSeqStart.getNode()->getOperand(0),
6118 Flags, DAG, dl);
6119
6120 // This must go outside the CALLSEQ_START..END.
6121 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6122 SDLoc(MemcpyCall));
6123 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6124 NewCallSeqStart.getNode());
6125 Chain = CallSeqStart = NewCallSeqStart;
6126
6127 // Pass the address of the aggregate copy on the stack either in a
6128 // physical register or in the parameter list area of the current stack
6129 // frame to the callee.
6130 Arg = PtrOff;
6131 }
6132
6133 // When useCRBits() is true, there can be i1 arguments.
6134 // It is because getRegisterType(MVT::i1) => MVT::i1,
6135 // and for other integer types getRegisterType() => MVT::i32.
6136 // Extend i1 and ensure callee will get i32.
6137 if (Arg.getValueType() == MVT::i1)
6138 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6139 dl, MVT::i32, Arg);
6140
6141 if (VA.isRegLoc()) {
6142 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6143 // Put argument in a physical register.
6144 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6145 bool IsLE = Subtarget.isLittleEndian();
6146 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6147 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6148 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6149 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6150 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6151 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6152 SVal.getValue(0)));
6153 } else
6154 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6155 } else {
6156 // Put argument in the parameter list area of the current stack frame.
6157 assert(VA.isMemLoc());
6158 unsigned LocMemOffset = VA.getLocMemOffset();
6159
6160 if (!IsTailCall) {
6161 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6162 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6163 StackPtr, PtrOff);
6164
6165 MemOpChains.push_back(
6166 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6167 } else {
6168 // Calculate and remember argument location.
6169 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6170 TailCallArguments);
6171 }
6172 }
6173 }
6174
6175 if (!MemOpChains.empty())
6176 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6177
6178 // Build a sequence of copy-to-reg nodes chained together with token chain
6179 // and flag operands which copy the outgoing args into the appropriate regs.
6180 SDValue InGlue;
6181 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6182 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6183 RegsToPass[i].second, InGlue);
6184 InGlue = Chain.getValue(1);
6185 }
6186
6187 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6188 // registers.
6189 if (IsVarArg) {
6190 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6191 SDValue Ops[] = { Chain, InGlue };
6192
6193 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6194 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6195
6196 InGlue = Chain.getValue(1);
6197 }
6198
6199 if (IsTailCall)
6200 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6201 TailCallArguments);
6202
6203 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6204 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6205}
6206
6207// Copy an argument into memory, being careful to do this outside the
6208// call sequence for the call to which the argument belongs.
6209SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6210 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6211 SelectionDAG &DAG, const SDLoc &dl) const {
6212 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6213 CallSeqStart.getNode()->getOperand(0),
6214 Flags, DAG, dl);
6215 // The MEMCPY must go outside the CALLSEQ_START..END.
6216 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6217 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6218 SDLoc(MemcpyCall));
6219 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6220 NewCallSeqStart.getNode());
6221 return NewCallSeqStart;
6222}
6223
6224SDValue PPCTargetLowering::LowerCall_64SVR4(
6225 SDValue Chain, SDValue Callee, CallFlags CFlags,
6227 const SmallVectorImpl<SDValue> &OutVals,
6228 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6230 const CallBase *CB) const {
6231 bool isELFv2ABI = Subtarget.isELFv2ABI();
6232 bool isLittleEndian = Subtarget.isLittleEndian();
6233 unsigned NumOps = Outs.size();
6234 bool IsSibCall = false;
6235 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6236
6237 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6238 unsigned PtrByteSize = 8;
6239
6241
6242 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6243 IsSibCall = true;
6244
6245 // Mark this function as potentially containing a function that contains a
6246 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6247 // and restoring the callers stack pointer in this functions epilog. This is
6248 // done because by tail calling the called function might overwrite the value
6249 // in this function's (MF) stack pointer stack slot 0(SP).
6250 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6251 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6252
6253 assert(!(IsFastCall && CFlags.IsVarArg) &&
6254 "fastcc not supported on varargs functions");
6255
6256 // Count how many bytes are to be pushed on the stack, including the linkage
6257 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6258 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6259 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6260 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6261 unsigned NumBytes = LinkageSize;
6262 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6263
6264 static const MCPhysReg GPR[] = {
6265 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6266 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6267 };
6268 static const MCPhysReg VR[] = {
6269 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6270 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6271 };
6272
6273 const unsigned NumGPRs = std::size(GPR);
6274 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6275 const unsigned NumVRs = std::size(VR);
6276
6277 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6278 // can be passed to the callee in registers.
6279 // For the fast calling convention, there is another check below.
6280 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6281 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6282 if (!HasParameterArea) {
6283 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6284 unsigned AvailableFPRs = NumFPRs;
6285 unsigned AvailableVRs = NumVRs;
6286 unsigned NumBytesTmp = NumBytes;
6287 for (unsigned i = 0; i != NumOps; ++i) {
6288 if (Outs[i].Flags.isNest()) continue;
6289 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6290 PtrByteSize, LinkageSize, ParamAreaSize,
6291 NumBytesTmp, AvailableFPRs, AvailableVRs))
6292 HasParameterArea = true;
6293 }
6294 }
6295
6296 // When using the fast calling convention, we don't provide backing for
6297 // arguments that will be in registers.
6298 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6299
6300 // Avoid allocating parameter area for fastcc functions if all the arguments
6301 // can be passed in the registers.
6302 if (IsFastCall)
6303 HasParameterArea = false;
6304
6305 // Add up all the space actually used.
6306 for (unsigned i = 0; i != NumOps; ++i) {
6307 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6308 EVT ArgVT = Outs[i].VT;
6309 EVT OrigVT = Outs[i].ArgVT;
6310
6311 if (Flags.isNest())
6312 continue;
6313
6314 if (IsFastCall) {
6315 if (Flags.isByVal()) {
6316 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6317 if (NumGPRsUsed > NumGPRs)
6318 HasParameterArea = true;
6319 } else {
6320 switch (ArgVT.getSimpleVT().SimpleTy) {
6321 default: llvm_unreachable("Unexpected ValueType for argument!");
6322 case MVT::i1:
6323 case MVT::i32:
6324 case MVT::i64:
6325 if (++NumGPRsUsed <= NumGPRs)
6326 continue;
6327 break;
6328 case MVT::v4i32:
6329 case MVT::v8i16:
6330 case MVT::v16i8:
6331 case MVT::v2f64:
6332 case MVT::v2i64:
6333 case MVT::v1i128:
6334 case MVT::f128:
6335 if (++NumVRsUsed <= NumVRs)
6336 continue;
6337 break;
6338 case MVT::v4f32:
6339 if (++NumVRsUsed <= NumVRs)
6340 continue;
6341 break;
6342 case MVT::f32:
6343 case MVT::f64:
6344 if (++NumFPRsUsed <= NumFPRs)
6345 continue;
6346 break;
6347 }
6348 HasParameterArea = true;
6349 }
6350 }
6351
6352 /* Respect alignment of argument on the stack. */
6353 auto Alignement =
6354 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6355 NumBytes = alignTo(NumBytes, Alignement);
6356
6357 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6358 if (Flags.isInConsecutiveRegsLast())
6359 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6360 }
6361
6362 unsigned NumBytesActuallyUsed = NumBytes;
6363
6364 // In the old ELFv1 ABI,
6365 // the prolog code of the callee may store up to 8 GPR argument registers to
6366 // the stack, allowing va_start to index over them in memory if its varargs.
6367 // Because we cannot tell if this is needed on the caller side, we have to
6368 // conservatively assume that it is needed. As such, make sure we have at
6369 // least enough stack space for the caller to store the 8 GPRs.
6370 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6371 // really requires memory operands, e.g. a vararg function.
6372 if (HasParameterArea)
6373 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6374 else
6375 NumBytes = LinkageSize;
6376
6377 // Tail call needs the stack to be aligned.
6378 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6379 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6380
6381 int SPDiff = 0;
6382
6383 // Calculate by how many bytes the stack has to be adjusted in case of tail
6384 // call optimization.
6385 if (!IsSibCall)
6386 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6387
6388 // To protect arguments on the stack from being clobbered in a tail call,
6389 // force all the loads to happen before doing any other lowering.
6390 if (CFlags.IsTailCall)
6391 Chain = DAG.getStackArgumentTokenFactor(Chain);
6392
6393 // Adjust the stack pointer for the new arguments...
6394 // These operations are automatically eliminated by the prolog/epilog pass
6395 if (!IsSibCall)
6396 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6397 SDValue CallSeqStart = Chain;
6398
6399 // Load the return address and frame pointer so it can be move somewhere else
6400 // later.
6401 SDValue LROp, FPOp;
6402 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6403
6404 // Set up a copy of the stack pointer for use loading and storing any
6405 // arguments that may not fit in the registers available for argument
6406 // passing.
6407 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6408
6409 // Figure out which arguments are going to go in registers, and which in
6410 // memory. Also, if this is a vararg function, floating point operations
6411 // must be stored to our stack, and loaded into integer regs as well, if
6412 // any integer regs are available for argument passing.
6413 unsigned ArgOffset = LinkageSize;
6414
6416 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6417
6418 SmallVector<SDValue, 8> MemOpChains;
6419 for (unsigned i = 0; i != NumOps; ++i) {
6420 SDValue Arg = OutVals[i];
6421 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6422 EVT ArgVT = Outs[i].VT;
6423 EVT OrigVT = Outs[i].ArgVT;
6424
6425 // PtrOff will be used to store the current argument to the stack if a
6426 // register cannot be found for it.
6427 SDValue PtrOff;
6428
6429 // We re-align the argument offset for each argument, except when using the
6430 // fast calling convention, when we need to make sure we do that only when
6431 // we'll actually use a stack slot.
6432 auto ComputePtrOff = [&]() {
6433 /* Respect alignment of argument on the stack. */
6434 auto Alignment =
6435 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6436 ArgOffset = alignTo(ArgOffset, Alignment);
6437
6438 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6439
6440 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6441 };
6442
6443 if (!IsFastCall) {
6444 ComputePtrOff();
6445
6446 /* Compute GPR index associated with argument offset. */
6447 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6448 GPR_idx = std::min(GPR_idx, NumGPRs);
6449 }
6450
6451 // Promote integers to 64-bit values.
6452 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6453 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6454 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6455 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6456 }
6457
6458 // FIXME memcpy is used way more than necessary. Correctness first.
6459 // Note: "by value" is code for passing a structure by value, not
6460 // basic types.
6461 if (Flags.isByVal()) {
6462 // Note: Size includes alignment padding, so
6463 // struct x { short a; char b; }
6464 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6465 // These are the proper values we need for right-justifying the
6466 // aggregate in a parameter register.
6467 unsigned Size = Flags.getByValSize();
6468
6469 // An empty aggregate parameter takes up no storage and no
6470 // registers.
6471 if (Size == 0)
6472 continue;
6473
6474 if (IsFastCall)
6475 ComputePtrOff();
6476
6477 // All aggregates smaller than 8 bytes must be passed right-justified.
6478 if (Size==1 || Size==2 || Size==4) {
6479 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6480 if (GPR_idx != NumGPRs) {
6481 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6482 MachinePointerInfo(), VT);
6483 MemOpChains.push_back(Load.getValue(1));
6484 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6485
6486 ArgOffset += PtrByteSize;
6487 continue;
6488 }
6489 }
6490
6491 if (GPR_idx == NumGPRs && Size < 8) {
6492 SDValue AddPtr = PtrOff;
6493 if (!isLittleEndian) {
6494 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6495 PtrOff.getValueType());
6496 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6497 }
6498 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6499 CallSeqStart,
6500 Flags, DAG, dl);
6501 ArgOffset += PtrByteSize;
6502 continue;
6503 }
6504 // Copy the object to parameter save area if it can not be entirely passed
6505 // by registers.
6506 // FIXME: we only need to copy the parts which need to be passed in
6507 // parameter save area. For the parts passed by registers, we don't need
6508 // to copy them to the stack although we need to allocate space for them
6509 // in parameter save area.
6510 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6511 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6512 CallSeqStart,
6513 Flags, DAG, dl);
6514
6515 // When a register is available, pass a small aggregate right-justified.
6516 if (Size < 8 && GPR_idx != NumGPRs) {
6517 // The easiest way to get this right-justified in a register
6518 // is to copy the structure into the rightmost portion of a
6519 // local variable slot, then load the whole slot into the
6520 // register.
6521 // FIXME: The memcpy seems to produce pretty awful code for
6522 // small aggregates, particularly for packed ones.
6523 // FIXME: It would be preferable to use the slot in the
6524 // parameter save area instead of a new local variable.
6525 SDValue AddPtr = PtrOff;
6526 if (!isLittleEndian) {
6527 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6528 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6529 }
6530 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6531 CallSeqStart,
6532 Flags, DAG, dl);
6533
6534 // Load the slot into the register.
6535 SDValue Load =
6536 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6537 MemOpChains.push_back(Load.getValue(1));
6538 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6539
6540 // Done with this argument.
6541 ArgOffset += PtrByteSize;
6542 continue;
6543 }
6544
6545 // For aggregates larger than PtrByteSize, copy the pieces of the
6546 // object that fit into registers from the parameter save area.
6547 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6548 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6549 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6550 if (GPR_idx != NumGPRs) {
6551 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6552 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6553 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6554 MachinePointerInfo(), ObjType);
6555
6556 MemOpChains.push_back(Load.getValue(1));
6557 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6558 ArgOffset += PtrByteSize;
6559 } else {
6560 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6561 break;
6562 }
6563 }
6564 continue;
6565 }
6566
6567 switch (Arg.getSimpleValueType().SimpleTy) {
6568 default: llvm_unreachable("Unexpected ValueType for argument!");
6569 case MVT::i1:
6570 case MVT::i32:
6571 case MVT::i64:
6572 if (Flags.isNest()) {
6573 // The 'nest' parameter, if any, is passed in R11.
6574 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6575 break;
6576 }
6577
6578 // These can be scalar arguments or elements of an integer array type
6579 // passed directly. Clang may use those instead of "byval" aggregate
6580 // types to avoid forcing arguments to memory unnecessarily.
6581 if (GPR_idx != NumGPRs) {
6582 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6583 } else {
6584 if (IsFastCall)
6585 ComputePtrOff();
6586
6587 assert(HasParameterArea &&
6588 "Parameter area must exist to pass an argument in memory.");
6589 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6590 true, CFlags.IsTailCall, false, MemOpChains,
6591 TailCallArguments, dl);
6592 if (IsFastCall)
6593 ArgOffset += PtrByteSize;
6594 }
6595 if (!IsFastCall)
6596 ArgOffset += PtrByteSize;
6597 break;
6598 case MVT::f32:
6599 case MVT::f64: {
6600 // These can be scalar arguments or elements of a float array type
6601 // passed directly. The latter are used to implement ELFv2 homogenous
6602 // float aggregates.
6603
6604 // Named arguments go into FPRs first, and once they overflow, the
6605 // remaining arguments go into GPRs and then the parameter save area.
6606 // Unnamed arguments for vararg functions always go to GPRs and
6607 // then the parameter save area. For now, put all arguments to vararg
6608 // routines always in both locations (FPR *and* GPR or stack slot).
6609 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6610 bool NeededLoad = false;
6611
6612 // First load the argument into the next available FPR.
6613 if (FPR_idx != NumFPRs)
6614 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6615
6616 // Next, load the argument into GPR or stack slot if needed.
6617 if (!NeedGPROrStack)
6618 ;
6619 else if (GPR_idx != NumGPRs && !IsFastCall) {
6620 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6621 // once we support fp <-> gpr moves.
6622
6623 // In the non-vararg case, this can only ever happen in the
6624 // presence of f32 array types, since otherwise we never run
6625 // out of FPRs before running out of GPRs.
6626 SDValue ArgVal;
6627
6628 // Double values are always passed in a single GPR.
6629 if (Arg.getValueType() != MVT::f32) {
6630 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6631
6632 // Non-array float values are extended and passed in a GPR.
6633 } else if (!Flags.isInConsecutiveRegs()) {
6634 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6635 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6636
6637 // If we have an array of floats, we collect every odd element
6638 // together with its predecessor into one GPR.
6639 } else if (ArgOffset % PtrByteSize != 0) {
6640 SDValue Lo, Hi;
6641 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6642 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6643 if (!isLittleEndian)
6644 std::swap(Lo, Hi);
6645 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6646
6647 // The final element, if even, goes into the first half of a GPR.
6648 } else if (Flags.isInConsecutiveRegsLast()) {
6649 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6650 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6651 if (!isLittleEndian)
6652 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6653 DAG.getConstant(32, dl, MVT::i32));
6654
6655 // Non-final even elements are skipped; they will be handled
6656 // together the with subsequent argument on the next go-around.
6657 } else
6658 ArgVal = SDValue();
6659
6660 if (ArgVal.getNode())
6661 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6662 } else {
6663 if (IsFastCall)
6664 ComputePtrOff();
6665
6666 // Single-precision floating-point values are mapped to the
6667 // second (rightmost) word of the stack doubleword.
6668 if (Arg.getValueType() == MVT::f32 &&
6669 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6670 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6671 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6672 }
6673
6674 assert(HasParameterArea &&
6675 "Parameter area must exist to pass an argument in memory.");
6676 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6677 true, CFlags.IsTailCall, false, MemOpChains,
6678 TailCallArguments, dl);
6679
6680 NeededLoad = true;
6681 }
6682 // When passing an array of floats, the array occupies consecutive
6683 // space in the argument area; only round up to the next doubleword
6684 // at the end of the array. Otherwise, each float takes 8 bytes.
6685 if (!IsFastCall || NeededLoad) {
6686 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6687 Flags.isInConsecutiveRegs()) ? 4 : 8;
6688 if (Flags.isInConsecutiveRegsLast())
6689 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6690 }
6691 break;
6692 }
6693 case MVT::v4f32:
6694 case MVT::v4i32:
6695 case MVT::v8i16:
6696 case MVT::v16i8:
6697 case MVT::v2f64:
6698 case MVT::v2i64:
6699 case MVT::v1i128:
6700 case MVT::f128:
6701 // These can be scalar arguments or elements of a vector array type
6702 // passed directly. The latter are used to implement ELFv2 homogenous
6703 // vector aggregates.
6704
6705 // For a varargs call, named arguments go into VRs or on the stack as
6706 // usual; unnamed arguments always go to the stack or the corresponding
6707 // GPRs when within range. For now, we always put the value in both
6708 // locations (or even all three).
6709 if (CFlags.IsVarArg) {
6710 assert(HasParameterArea &&
6711 "Parameter area must exist if we have a varargs call.");
6712 // We could elide this store in the case where the object fits
6713 // entirely in R registers. Maybe later.
6714 SDValue Store =
6715 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6716 MemOpChains.push_back(Store);
6717 if (VR_idx != NumVRs) {
6718 SDValue Load =
6719 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6720 MemOpChains.push_back(Load.getValue(1));
6721 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6722 }
6723 ArgOffset += 16;
6724 for (unsigned i=0; i<16; i+=PtrByteSize) {
6725 if (GPR_idx == NumGPRs)
6726 break;
6727 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6728 DAG.getConstant(i, dl, PtrVT));
6729 SDValue Load =
6730 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6731 MemOpChains.push_back(Load.getValue(1));
6732 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6733 }
6734 break;
6735 }
6736
6737 // Non-varargs Altivec params go into VRs or on the stack.
6738 if (VR_idx != NumVRs) {
6739 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6740 } else {
6741 if (IsFastCall)
6742 ComputePtrOff();
6743
6744 assert(HasParameterArea &&
6745 "Parameter area must exist to pass an argument in memory.");
6746 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6747 true, CFlags.IsTailCall, true, MemOpChains,
6748 TailCallArguments, dl);
6749 if (IsFastCall)
6750 ArgOffset += 16;
6751 }
6752
6753 if (!IsFastCall)
6754 ArgOffset += 16;
6755 break;
6756 }
6757 }
6758
6759 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6760 "mismatch in size of parameter area");
6761 (void)NumBytesActuallyUsed;
6762
6763 if (!MemOpChains.empty())
6764 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6765
6766 // Check if this is an indirect call (MTCTR/BCTRL).
6767 // See prepareDescriptorIndirectCall and buildCallOperands for more
6768 // information about calls through function pointers in the 64-bit SVR4 ABI.
6769 if (CFlags.IsIndirect) {
6770 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6771 // caller in the TOC save area.
6772 if (isTOCSaveRestoreRequired(Subtarget)) {
6773 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6774 // Load r2 into a virtual register and store it to the TOC save area.
6775 setUsesTOCBasePtr(DAG);
6776 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6777 // TOC save area offset.
6778 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6779 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6780 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6781 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6783 DAG.getMachineFunction(), TOCSaveOffset));
6784 }
6785 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6786 // This does not mean the MTCTR instruction must use R12; it's easier
6787 // to model this as an extra parameter, so do that.
6788 if (isELFv2ABI && !CFlags.IsPatchPoint)
6789 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6790 }
6791
6792 // Build a sequence of copy-to-reg nodes chained together with token chain
6793 // and flag operands which copy the outgoing args into the appropriate regs.
6794 SDValue InGlue;
6795 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6796 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6797 RegsToPass[i].second, InGlue);
6798 InGlue = Chain.getValue(1);
6799 }
6800
6801 if (CFlags.IsTailCall && !IsSibCall)
6802 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6803 TailCallArguments);
6804
6805 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6806 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6807}
6808
6809// Returns true when the shadow of a general purpose argument register
6810// in the parameter save area is aligned to at least 'RequiredAlign'.
6811static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6812 assert(RequiredAlign.value() <= 16 &&
6813 "Required alignment greater than stack alignment.");
6814 switch (Reg) {
6815 default:
6816 report_fatal_error("called on invalid register.");
6817 case PPC::R5:
6818 case PPC::R9:
6819 case PPC::X3:
6820 case PPC::X5:
6821 case PPC::X7:
6822 case PPC::X9:
6823 // These registers are 16 byte aligned which is the most strict aligment
6824 // we can support.
6825 return true;
6826 case PPC::R3:
6827 case PPC::R7:
6828 case PPC::X4:
6829 case PPC::X6:
6830 case PPC::X8:
6831 case PPC::X10:
6832 // The shadow of these registers in the PSA is 8 byte aligned.
6833 return RequiredAlign <= 8;
6834 case PPC::R4:
6835 case PPC::R6:
6836 case PPC::R8:
6837 case PPC::R10:
6838 return RequiredAlign <= 4;
6839 }
6840}
6841
6842static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6843 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6844 CCState &S) {
6845 AIXCCState &State = static_cast<AIXCCState &>(S);
6846 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6848 const bool IsPPC64 = Subtarget.isPPC64();
6849 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6850 const Align PtrAlign(PtrSize);
6851 const Align StackAlign(16);
6852 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6853
6854 if (ValVT == MVT::f128)
6855 report_fatal_error("f128 is unimplemented on AIX.");
6856
6857 if (ArgFlags.isNest())
6858 report_fatal_error("Nest arguments are unimplemented.");
6859
6860 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6861 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6862 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6863 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6864 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6865 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6866
6867 static const MCPhysReg VR[] = {// Vector registers.
6868 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6869 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6870 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6871
6872 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6873
6874 if (ArgFlags.isByVal()) {
6875 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6876 if (ByValAlign > StackAlign)
6877 report_fatal_error("Pass-by-value arguments with alignment greater than "
6878 "16 are not supported.");
6879
6880 const unsigned ByValSize = ArgFlags.getByValSize();
6881 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6882
6883 // An empty aggregate parameter takes up no storage and no registers,
6884 // but needs a MemLoc for a stack slot for the formal arguments side.
6885 if (ByValSize == 0) {
6887 State.getStackSize(), RegVT, LocInfo));
6888 return false;
6889 }
6890
6891 // Shadow allocate any registers that are not properly aligned.
6892 unsigned NextReg = State.getFirstUnallocated(GPRs);
6893 while (NextReg != GPRs.size() &&
6894 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6895 // Shadow allocate next registers since its aligment is not strict enough.
6896 unsigned Reg = State.AllocateReg(GPRs);
6897 // Allocate the stack space shadowed by said register.
6898 State.AllocateStack(PtrSize, PtrAlign);
6899 assert(Reg && "Alocating register unexpectedly failed.");
6900 (void)Reg;
6901 NextReg = State.getFirstUnallocated(GPRs);
6902 }
6903
6904 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6905 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6906 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6907 if (unsigned Reg = State.AllocateReg(GPRs))
6908 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6909 else {
6912 LocInfo));
6913 break;
6914 }
6915 }
6916 return false;
6917 }
6918
6919 // Arguments always reserve parameter save area.
6920 switch (ValVT.SimpleTy) {
6921 default:
6922 report_fatal_error("Unhandled value type for argument.");
6923 case MVT::i64:
6924 // i64 arguments should have been split to i32 for PPC32.
6925 assert(IsPPC64 && "PPC32 should have split i64 values.");
6926 [[fallthrough]];
6927 case MVT::i1:
6928 case MVT::i32: {
6929 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6930 // AIX integer arguments are always passed in register width.
6931 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6932 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6934 if (unsigned Reg = State.AllocateReg(GPRs))
6935 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6936 else
6937 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6938
6939 return false;
6940 }
6941 case MVT::f32:
6942 case MVT::f64: {
6943 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6944 const unsigned StoreSize = LocVT.getStoreSize();
6945 // Floats are always 4-byte aligned in the PSA on AIX.
6946 // This includes f64 in 64-bit mode for ABI compatibility.
6947 const unsigned Offset =
6948 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6949 unsigned FReg = State.AllocateReg(FPR);
6950 if (FReg)
6951 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6952
6953 // Reserve and initialize GPRs or initialize the PSA as required.
6954 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6955 if (unsigned Reg = State.AllocateReg(GPRs)) {
6956 assert(FReg && "An FPR should be available when a GPR is reserved.");
6957 if (State.isVarArg()) {
6958 // Successfully reserved GPRs are only initialized for vararg calls.
6959 // Custom handling is required for:
6960 // f64 in PPC32 needs to be split into 2 GPRs.
6961 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6962 State.addLoc(
6963 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6964 }
6965 } else {
6966 // If there are insufficient GPRs, the PSA needs to be initialized.
6967 // Initialization occurs even if an FPR was initialized for
6968 // compatibility with the AIX XL compiler. The full memory for the
6969 // argument will be initialized even if a prior word is saved in GPR.
6970 // A custom memLoc is used when the argument also passes in FPR so
6971 // that the callee handling can skip over it easily.
6972 State.addLoc(
6973 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6974 LocInfo)
6975 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6976 break;
6977 }
6978 }
6979
6980 return false;
6981 }
6982 case MVT::v4f32:
6983 case MVT::v4i32:
6984 case MVT::v8i16:
6985 case MVT::v16i8:
6986 case MVT::v2i64:
6987 case MVT::v2f64:
6988 case MVT::v1i128: {
6989 const unsigned VecSize = 16;
6990 const Align VecAlign(VecSize);
6991
6992 if (!State.isVarArg()) {
6993 // If there are vector registers remaining we don't consume any stack
6994 // space.
6995 if (unsigned VReg = State.AllocateReg(VR)) {
6996 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6997 return false;
6998 }
6999 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7000 // might be allocated in the portion of the PSA that is shadowed by the
7001 // GPRs.
7002 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7003 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7004 return false;
7005 }
7006
7007 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7008 // Burn any underaligned registers and their shadowed stack space until
7009 // we reach the required alignment.
7010 while (NextRegIndex != GPRs.size() &&
7011 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7012 // Shadow allocate register and its stack shadow.
7013 unsigned Reg = State.AllocateReg(GPRs);
7014 State.AllocateStack(PtrSize, PtrAlign);
7015 assert(Reg && "Allocating register unexpectedly failed.");
7016 (void)Reg;
7017 NextRegIndex = State.getFirstUnallocated(GPRs);
7018 }
7019
7020 // Vectors that are passed as fixed arguments are handled differently.
7021 // They are passed in VRs if any are available (unlike arguments passed
7022 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7023 // functions)
7024 if (State.isFixed(ValNo)) {
7025 if (unsigned VReg = State.AllocateReg(VR)) {
7026 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7027 // Shadow allocate GPRs and stack space even though we pass in a VR.
7028 for (unsigned I = 0; I != VecSize; I += PtrSize)
7029 State.AllocateReg(GPRs);
7030 State.AllocateStack(VecSize, VecAlign);
7031 return false;
7032 }
7033 // No vector registers remain so pass on the stack.
7034 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7035 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7036 return false;
7037 }
7038
7039 // If all GPRS are consumed then we pass the argument fully on the stack.
7040 if (NextRegIndex == GPRs.size()) {
7041 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7042 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7043 return false;
7044 }
7045
7046 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7047 // half of the argument, and then need to pass the remaining half on the
7048 // stack.
7049 if (GPRs[NextRegIndex] == PPC::R9) {
7050 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7051 State.addLoc(
7052 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053
7054 const unsigned FirstReg = State.AllocateReg(PPC::R9);
7055 const unsigned SecondReg = State.AllocateReg(PPC::R10);
7056 assert(FirstReg && SecondReg &&
7057 "Allocating R9 or R10 unexpectedly failed.");
7058 State.addLoc(
7059 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7060 State.addLoc(
7061 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7062 return false;
7063 }
7064
7065 // We have enough GPRs to fully pass the vector argument, and we have
7066 // already consumed any underaligned registers. Start with the custom
7067 // MemLoc and then the custom RegLocs.
7068 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7069 State.addLoc(
7070 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7071 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7072 const unsigned Reg = State.AllocateReg(GPRs);
7073 assert(Reg && "Failed to allocated register for vararg vector argument");
7074 State.addLoc(
7075 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7076 }
7077 return false;
7078 }
7079 }
7080 return true;
7081}
7082
7083// So far, this function is only used by LowerFormalArguments_AIX()
7085 bool IsPPC64,
7086 bool HasP8Vector,
7087 bool HasVSX) {
7088 assert((IsPPC64 || SVT != MVT::i64) &&
7089 "i64 should have been split for 32-bit codegen.");
7090
7091 switch (SVT) {
7092 default:
7093 report_fatal_error("Unexpected value type for formal argument");
7094 case MVT::i1:
7095 case MVT::i32:
7096 case MVT::i64:
7097 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7098 case MVT::f32:
7099 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7100 case MVT::f64:
7101 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7102 case MVT::v4f32:
7103 case MVT::v4i32:
7104 case MVT::v8i16:
7105 case MVT::v16i8:
7106 case MVT::v2i64:
7107 case MVT::v2f64:
7108 case MVT::v1i128:
7109 return &PPC::VRRCRegClass;
7110 }
7111}
7112
7114 SelectionDAG &DAG, SDValue ArgValue,
7115 MVT LocVT, const SDLoc &dl) {
7116 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7117 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7118
7119 if (Flags.isSExt())
7120 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7121 DAG.getValueType(ValVT));
7122 else if (Flags.isZExt())
7123 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7124 DAG.getValueType(ValVT));
7125
7126 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7127}
7128
7129static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7130 const unsigned LASize = FL->getLinkageSize();
7131
7132 if (PPC::GPRCRegClass.contains(Reg)) {
7133 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7134 "Reg must be a valid argument register!");
7135 return LASize + 4 * (Reg - PPC::R3);
7136 }
7137
7138 if (PPC::G8RCRegClass.contains(Reg)) {
7139 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7140 "Reg must be a valid argument register!");
7141 return LASize + 8 * (Reg - PPC::X3);
7142 }
7143
7144 llvm_unreachable("Only general purpose registers expected.");
7145}
7146
7147// AIX ABI Stack Frame Layout:
7148//
7149// Low Memory +--------------------------------------------+
7150// SP +---> | Back chain | ---+
7151// | +--------------------------------------------+ |
7152// | | Saved Condition Register | |
7153// | +--------------------------------------------+ |
7154// | | Saved Linkage Register | |
7155// | +--------------------------------------------+ | Linkage Area
7156// | | Reserved for compilers | |
7157// | +--------------------------------------------+ |
7158// | | Reserved for binders | |
7159// | +--------------------------------------------+ |
7160// | | Saved TOC pointer | ---+
7161// | +--------------------------------------------+
7162// | | Parameter save area |
7163// | +--------------------------------------------+
7164// | | Alloca space |
7165// | +--------------------------------------------+
7166// | | Local variable space |
7167// | +--------------------------------------------+
7168// | | Float/int conversion temporary |
7169// | +--------------------------------------------+
7170// | | Save area for AltiVec registers |
7171// | +--------------------------------------------+
7172// | | AltiVec alignment padding |
7173// | +--------------------------------------------+
7174// | | Save area for VRSAVE register |
7175// | +--------------------------------------------+
7176// | | Save area for General Purpose registers |
7177// | +--------------------------------------------+
7178// | | Save area for Floating Point registers |
7179// | +--------------------------------------------+
7180// +---- | Back chain |
7181// High Memory +--------------------------------------------+
7182//
7183// Specifications:
7184// AIX 7.2 Assembler Language Reference
7185// Subroutine linkage convention
7186
7187SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7188 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7189 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7190 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7191
7192 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7193 CallConv == CallingConv::Fast) &&
7194 "Unexpected calling convention!");
7195
7196 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7197 report_fatal_error("Tail call support is unimplemented on AIX.");
7198
7199 if (useSoftFloat())
7200 report_fatal_error("Soft float support is unimplemented on AIX.");
7201
7202 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7203
7204 const bool IsPPC64 = Subtarget.isPPC64();
7205 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7206
7207 // Assign locations to all of the incoming arguments.
7210 MachineFrameInfo &MFI = MF.getFrameInfo();
7211 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7212 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7213
7214 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7215 // Reserve space for the linkage area on the stack.
7216 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7217 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7218 uint64_t SaveStackPos = CCInfo.getStackSize();
7219 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7220 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7221
7223
7224 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7225 CCValAssign &VA = ArgLocs[I++];
7226 MVT LocVT = VA.getLocVT();
7227 MVT ValVT = VA.getValVT();
7228 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7229 // For compatibility with the AIX XL compiler, the float args in the
7230 // parameter save area are initialized even if the argument is available
7231 // in register. The caller is required to initialize both the register
7232 // and memory, however, the callee can choose to expect it in either.
7233 // The memloc is dismissed here because the argument is retrieved from
7234 // the register.
7235 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7236 continue;
7237
7238 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7239 const TargetRegisterClass *RegClass = getRegClassForSVT(
7240 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7241 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7242 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7243 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7244 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7245 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7246 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7247 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7248 MachinePointerInfo(), Align(PtrByteSize));
7249 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7250 MemOps.push_back(StoreReg);
7251 }
7252
7253 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7254 unsigned StoreSize =
7255 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7256 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7257 }
7258
7259 auto HandleMemLoc = [&]() {
7260 const unsigned LocSize = LocVT.getStoreSize();
7261 const unsigned ValSize = ValVT.getStoreSize();
7262 assert((ValSize <= LocSize) &&
7263 "Object size is larger than size of MemLoc");
7264 int CurArgOffset = VA.getLocMemOffset();
7265 // Objects are right-justified because AIX is big-endian.
7266 if (LocSize > ValSize)
7267 CurArgOffset += LocSize - ValSize;
7268 // Potential tail calls could cause overwriting of argument stack slots.
7269 const bool IsImmutable =
7271 (CallConv == CallingConv::Fast));
7272 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7273 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7274 SDValue ArgValue =
7275 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7276 InVals.push_back(ArgValue);
7277 };
7278
7279 // Vector arguments to VaArg functions are passed both on the stack, and
7280 // in any available GPRs. Load the value from the stack and add the GPRs
7281 // as live ins.
7282 if (VA.isMemLoc() && VA.needsCustom()) {
7283 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7284 assert(isVarArg && "Only use custom memloc for vararg.");
7285 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7286 // matching custom RegLocs.
7287 const unsigned OriginalValNo = VA.getValNo();
7288 (void)OriginalValNo;
7289
7290 auto HandleCustomVecRegLoc = [&]() {
7291 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7292 "Missing custom RegLoc.");
7293 VA = ArgLocs[I++];
7294 assert(VA.getValVT().isVector() &&
7295 "Unexpected Val type for custom RegLoc.");
7296 assert(VA.getValNo() == OriginalValNo &&
7297 "ValNo mismatch between custom MemLoc and RegLoc.");
7299 MF.addLiveIn(VA.getLocReg(),
7300 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7301 Subtarget.hasVSX()));
7302 };
7303
7304 HandleMemLoc();
7305 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7306 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7307 // R10.
7308 HandleCustomVecRegLoc();
7309 HandleCustomVecRegLoc();
7310
7311 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7312 // we passed the vector in R5, R6, R7 and R8.
7313 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7314 assert(!IsPPC64 &&
7315 "Only 2 custom RegLocs expected for 64-bit codegen.");
7316 HandleCustomVecRegLoc();
7317 HandleCustomVecRegLoc();
7318 }
7319
7320 continue;
7321 }
7322
7323 if (VA.isRegLoc()) {
7324 if (VA.getValVT().isScalarInteger())
7326 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7327 switch (VA.getValVT().SimpleTy) {
7328 default:
7329 report_fatal_error("Unhandled value type for argument.");
7330 case MVT::f32:
7332 break;
7333 case MVT::f64:
7335 break;
7336 }
7337 } else if (VA.getValVT().isVector()) {
7338 switch (VA.getValVT().SimpleTy) {
7339 default:
7340 report_fatal_error("Unhandled value type for argument.");
7341 case MVT::v16i8:
7343 break;
7344 case MVT::v8i16:
7346 break;
7347 case MVT::v4i32:
7348 case MVT::v2i64:
7349 case MVT::v1i128:
7351 break;
7352 case MVT::v4f32:
7353 case MVT::v2f64:
7355 break;
7356 }
7357 }
7358 }
7359
7360 if (Flags.isByVal() && VA.isMemLoc()) {
7361 const unsigned Size =
7362 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7363 PtrByteSize);
7364 const int FI = MF.getFrameInfo().CreateFixedObject(
7365 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7366 /* IsAliased */ true);
7367 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7368 InVals.push_back(FIN);
7369
7370 continue;
7371 }
7372
7373 if (Flags.isByVal()) {
7374 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7375
7376 const MCPhysReg ArgReg = VA.getLocReg();
7377 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7378
7379 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7380 const int FI = MF.getFrameInfo().CreateFixedObject(
7381 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7382 /* IsAliased */ true);
7383 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7384 InVals.push_back(FIN);
7385
7386 // Add live ins for all the RegLocs for the same ByVal.
7387 const TargetRegisterClass *RegClass =
7388 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7389
7390 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7391 unsigned Offset) {
7392 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7393 // Since the callers side has left justified the aggregate in the
7394 // register, we can simply store the entire register into the stack
7395 // slot.
7396 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7397 // The store to the fixedstack object is needed becuase accessing a
7398 // field of the ByVal will use a gep and load. Ideally we will optimize
7399 // to extracting the value from the register directly, and elide the
7400 // stores when the arguments address is not taken, but that will need to
7401 // be future work.
7402 SDValue Store = DAG.getStore(
7403 CopyFrom.getValue(1), dl, CopyFrom,
7406
7407 MemOps.push_back(Store);
7408 };
7409
7410 unsigned Offset = 0;
7411 HandleRegLoc(VA.getLocReg(), Offset);
7412 Offset += PtrByteSize;
7413 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7414 Offset += PtrByteSize) {
7415 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7416 "RegLocs should be for ByVal argument.");
7417
7418 const CCValAssign RL = ArgLocs[I++];
7419 HandleRegLoc(RL.getLocReg(), Offset);
7421 }
7422
7423 if (Offset != StackSize) {
7424 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7425 "Expected MemLoc for remaining bytes.");
7426 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7427 // Consume the MemLoc.The InVal has already been emitted, so nothing
7428 // more needs to be done.
7429 ++I;
7430 }
7431
7432 continue;
7433 }
7434
7435 if (VA.isRegLoc() && !VA.needsCustom()) {
7436 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7437 Register VReg =
7438 MF.addLiveIn(VA.getLocReg(),
7439 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7440 Subtarget.hasVSX()));
7441 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7442 if (ValVT.isScalarInteger() &&
7443 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7444 ArgValue =
7445 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7446 }
7447 InVals.push_back(ArgValue);
7448 continue;
7449 }
7450 if (VA.isMemLoc()) {
7451 HandleMemLoc();
7452 continue;
7453 }
7454 }
7455
7456 // On AIX a minimum of 8 words is saved to the parameter save area.
7457 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7458 // Area that is at least reserved in the caller of this function.
7459 unsigned CallerReservedArea = std::max<unsigned>(
7460 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7461
7462 // Set the size that is at least reserved in caller of this function. Tail
7463 // call optimized function's reserved stack space needs to be aligned so
7464 // that taking the difference between two stack areas will result in an
7465 // aligned stack.
7466 CallerReservedArea =
7467 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7468 FuncInfo->setMinReservedArea(CallerReservedArea);
7469
7470 if (isVarArg) {
7471 FuncInfo->setVarArgsFrameIndex(
7472 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7473 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7474
7475 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7476 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7477
7478 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7479 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7480 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7481
7482 // The fixed integer arguments of a variadic function are stored to the
7483 // VarArgsFrameIndex on the stack so that they may be loaded by
7484 // dereferencing the result of va_next.
7485 for (unsigned GPRIndex =
7486 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7487 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7488
7489 const Register VReg =
7490 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7491 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7492
7493 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7494 SDValue Store =
7495 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7496 MemOps.push_back(Store);
7497 // Increment the address for the next argument to store.
7498 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7499 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7500 }
7501 }
7502
7503 if (!MemOps.empty())
7504 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7505
7506 return Chain;
7507}
7508
7509SDValue PPCTargetLowering::LowerCall_AIX(
7510 SDValue Chain, SDValue Callee, CallFlags CFlags,
7512 const SmallVectorImpl<SDValue> &OutVals,
7513 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7515 const CallBase *CB) const {
7516 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7517 // AIX ABI stack frame layout.
7518
7519 assert((CFlags.CallConv == CallingConv::C ||
7520 CFlags.CallConv == CallingConv::Cold ||
7521 CFlags.CallConv == CallingConv::Fast) &&
7522 "Unexpected calling convention!");
7523
7524 if (CFlags.IsPatchPoint)
7525 report_fatal_error("This call type is unimplemented on AIX.");
7526
7527 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7528
7531 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7532 *DAG.getContext());
7533
7534 // Reserve space for the linkage save area (LSA) on the stack.
7535 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7536 // [SP][CR][LR][2 x reserved][TOC].
7537 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7538 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7539 const bool IsPPC64 = Subtarget.isPPC64();
7540 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7541 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7542 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7543 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7544
7545 // The prolog code of the callee may store up to 8 GPR argument registers to
7546 // the stack, allowing va_start to index over them in memory if the callee
7547 // is variadic.
7548 // Because we cannot tell if this is needed on the caller side, we have to
7549 // conservatively assume that it is needed. As such, make sure we have at
7550 // least enough stack space for the caller to store the 8 GPRs.
7551 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7552 const unsigned NumBytes = std::max<unsigned>(
7553 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7554
7555 // Adjust the stack pointer for the new arguments...
7556 // These operations are automatically eliminated by the prolog/epilog pass.
7557 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7558 SDValue CallSeqStart = Chain;
7559
7561 SmallVector<SDValue, 8> MemOpChains;
7562
7563 // Set up a copy of the stack pointer for loading and storing any
7564 // arguments that may not fit in the registers available for argument
7565 // passing.
7566 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7567 : DAG.getRegister(PPC::R1, MVT::i32);
7568
7569 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7570 const unsigned ValNo = ArgLocs[I].getValNo();
7571 SDValue Arg = OutVals[ValNo];
7572 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7573
7574 if (Flags.isByVal()) {
7575 const unsigned ByValSize = Flags.getByValSize();
7576
7577 // Nothing to do for zero-sized ByVals on the caller side.
7578 if (!ByValSize) {
7579 ++I;
7580 continue;
7581 }
7582
7583 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7584 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7585 (LoadOffset != 0)
7586 ? DAG.getObjectPtrOffset(
7587 dl, Arg, TypeSize::getFixed(LoadOffset))
7588 : Arg,
7589 MachinePointerInfo(), VT);
7590 };
7591
7592 unsigned LoadOffset = 0;
7593
7594 // Initialize registers, which are fully occupied by the by-val argument.
7595 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7596 SDValue Load = GetLoad(PtrVT, LoadOffset);
7597 MemOpChains.push_back(Load.getValue(1));
7598 LoadOffset += PtrByteSize;
7599 const CCValAssign &ByValVA = ArgLocs[I++];
7600 assert(ByValVA.getValNo() == ValNo &&
7601 "Unexpected location for pass-by-value argument.");
7602 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7603 }
7604
7605 if (LoadOffset == ByValSize)
7606 continue;
7607
7608 // There must be one more loc to handle the remainder.
7609 assert(ArgLocs[I].getValNo() == ValNo &&
7610 "Expected additional location for by-value argument.");
7611
7612 if (ArgLocs[I].isMemLoc()) {
7613 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7614 const CCValAssign &ByValVA = ArgLocs[I++];
7615 ISD::ArgFlagsTy MemcpyFlags = Flags;
7616 // Only memcpy the bytes that don't pass in register.
7617 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7618 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7619 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7620 dl, Arg, TypeSize::getFixed(LoadOffset))
7621 : Arg,
7623 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7624 CallSeqStart, MemcpyFlags, DAG, dl);
7625 continue;
7626 }
7627
7628 // Initialize the final register residue.
7629 // Any residue that occupies the final by-val arg register must be
7630 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7631 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7632 // 2 and 1 byte loads.
7633 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7634 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7635 "Unexpected register residue for by-value argument.");
7636 SDValue ResidueVal;
7637 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7638 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7639 const MVT VT =
7640 N == 1 ? MVT::i8
7641 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7642 SDValue Load = GetLoad(VT, LoadOffset);
7643 MemOpChains.push_back(Load.getValue(1));
7644 LoadOffset += N;
7645 Bytes += N;
7646
7647 // By-val arguments are passed left-justfied in register.
7648 // Every load here needs to be shifted, otherwise a full register load
7649 // should have been used.
7650 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7651 "Unexpected load emitted during handling of pass-by-value "
7652 "argument.");
7653 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7654 EVT ShiftAmountTy =
7655 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7656 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7657 SDValue ShiftedLoad =
7658 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7659 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7660 ShiftedLoad)
7661 : ShiftedLoad;
7662 }
7663
7664 const CCValAssign &ByValVA = ArgLocs[I++];
7665 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7666 continue;
7667 }
7668
7669 CCValAssign &VA = ArgLocs[I++];
7670 const MVT LocVT = VA.getLocVT();
7671 const MVT ValVT = VA.getValVT();
7672
7673 switch (VA.getLocInfo()) {
7674 default:
7675 report_fatal_error("Unexpected argument extension type.");
7676 case CCValAssign::Full:
7677 break;
7678 case CCValAssign::ZExt:
7679 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7680 break;
7681 case CCValAssign::SExt:
7682 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7683 break;
7684 }
7685
7686 if (VA.isRegLoc() && !VA.needsCustom()) {
7687 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7688 continue;
7689 }
7690
7691 // Vector arguments passed to VarArg functions need custom handling when
7692 // they are passed (at least partially) in GPRs.
7693 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7694 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7695 // Store value to its stack slot.
7696 SDValue PtrOff =
7697 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7698 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7699 SDValue Store =
7700 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7701 MemOpChains.push_back(Store);
7702 const unsigned OriginalValNo = VA.getValNo();
7703 // Then load the GPRs from the stack
7704 unsigned LoadOffset = 0;
7705 auto HandleCustomVecRegLoc = [&]() {
7706 assert(I != E && "Unexpected end of CCvalAssigns.");
7707 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7708 "Expected custom RegLoc.");
7709 CCValAssign RegVA = ArgLocs[I++];
7710 assert(RegVA.getValNo() == OriginalValNo &&
7711 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7712 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7713 DAG.getConstant(LoadOffset, dl, PtrVT));
7714 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7715 MemOpChains.push_back(Load.getValue(1));
7716 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7717 LoadOffset += PtrByteSize;
7718 };
7719
7720 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7721 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7722 // R10.
7723 HandleCustomVecRegLoc();
7724 HandleCustomVecRegLoc();
7725
7726 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7727 ArgLocs[I].getValNo() == OriginalValNo) {
7728 assert(!IsPPC64 &&
7729 "Only 2 custom RegLocs expected for 64-bit codegen.");
7730 HandleCustomVecRegLoc();
7731 HandleCustomVecRegLoc();
7732 }
7733
7734 continue;
7735 }
7736
7737 if (VA.isMemLoc()) {
7738 SDValue PtrOff =
7739 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7740 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7741 MemOpChains.push_back(
7742 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7743
7744 continue;
7745 }
7746
7747 if (!ValVT.isFloatingPoint())
7749 "Unexpected register handling for calling convention.");
7750
7751 // Custom handling is used for GPR initializations for vararg float
7752 // arguments.
7753 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7754 LocVT.isInteger() &&
7755 "Custom register handling only expected for VarArg.");
7756
7757 SDValue ArgAsInt =
7758 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7759
7760 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7761 // f32 in 32-bit GPR
7762 // f64 in 64-bit GPR
7763 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7764 else if (Arg.getValueType().getFixedSizeInBits() <
7765 LocVT.getFixedSizeInBits())
7766 // f32 in 64-bit GPR.
7767 RegsToPass.push_back(std::make_pair(
7768 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7769 else {
7770 // f64 in two 32-bit GPRs
7771 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7772 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7773 "Unexpected custom register for argument!");
7774 CCValAssign &GPR1 = VA;
7775 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7776 DAG.getConstant(32, dl, MVT::i8));
7777 RegsToPass.push_back(std::make_pair(
7778 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7779
7780 if (I != E) {
7781 // If only 1 GPR was available, there will only be one custom GPR and
7782 // the argument will also pass in memory.
7783 CCValAssign &PeekArg = ArgLocs[I];
7784 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7785 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7786 CCValAssign &GPR2 = ArgLocs[I++];
7787 RegsToPass.push_back(std::make_pair(
7788 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7789 }
7790 }
7791 }
7792 }
7793
7794 if (!MemOpChains.empty())
7795 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7796
7797 // For indirect calls, we need to save the TOC base to the stack for
7798 // restoration after the call.
7799 if (CFlags.IsIndirect) {
7800 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7801 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7802 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7803 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7804 const unsigned TOCSaveOffset =
7805 Subtarget.getFrameLowering()->getTOCSaveOffset();
7806
7807 setUsesTOCBasePtr(DAG);
7808 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7809 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7810 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7811 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7812 Chain = DAG.getStore(
7813 Val.getValue(1), dl, Val, AddPtr,
7814 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7815 }
7816
7817 // Build a sequence of copy-to-reg nodes chained together with token chain
7818 // and flag operands which copy the outgoing args into the appropriate regs.
7819 SDValue InGlue;
7820 for (auto Reg : RegsToPass) {
7821 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7822 InGlue = Chain.getValue(1);
7823 }
7824
7825 const int SPDiff = 0;
7826 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7827 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7828}
7829
7830bool
7831PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7832 MachineFunction &MF, bool isVarArg,
7834 LLVMContext &Context) const {
7836 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7837 return CCInfo.CheckReturn(
7838 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7840 : RetCC_PPC);
7841}
7842
7843SDValue
7844PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7845 bool isVarArg,
7847 const SmallVectorImpl<SDValue> &OutVals,
7848 const SDLoc &dl, SelectionDAG &DAG) const {
7850 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7851 *DAG.getContext());
7852 CCInfo.AnalyzeReturn(Outs,
7853 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7855 : RetCC_PPC);
7856
7857 SDValue Glue;
7858 SmallVector<SDValue, 4> RetOps(1, Chain);
7859
7860 // Copy the result values into the output registers.
7861 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7862 CCValAssign &VA = RVLocs[i];
7863 assert(VA.isRegLoc() && "Can only return in registers!");
7864
7865 SDValue Arg = OutVals[RealResIdx];
7866
7867 switch (VA.getLocInfo()) {
7868 default: llvm_unreachable("Unknown loc info!");
7869 case CCValAssign::Full: break;
7870 case CCValAssign::AExt:
7871 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7872 break;
7873 case CCValAssign::ZExt:
7874 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7875 break;
7876 case CCValAssign::SExt:
7877 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7878 break;
7879 }
7880 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7881 bool isLittleEndian = Subtarget.isLittleEndian();
7882 // Legalize ret f64 -> ret 2 x i32.
7883 SDValue SVal =
7884 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7885 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7886 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7887 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7888 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7889 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7890 Glue = Chain.getValue(1);
7891 VA = RVLocs[++i]; // skip ahead to next loc
7892 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7893 } else
7894 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7895 Glue = Chain.getValue(1);
7896 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7897 }
7898
7899 RetOps[0] = Chain; // Update chain.
7900
7901 // Add the glue if we have it.
7902 if (Glue.getNode())
7903 RetOps.push_back(Glue);
7904
7905 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7906}
7907
7908SDValue
7909PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7910 SelectionDAG &DAG) const {
7911 SDLoc dl(Op);
7912
7913 // Get the correct type for integers.
7914 EVT IntVT = Op.getValueType();
7915
7916 // Get the inputs.
7917 SDValue Chain = Op.getOperand(0);
7918 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7919 // Build a DYNAREAOFFSET node.
7920 SDValue Ops[2] = {Chain, FPSIdx};
7921 SDVTList VTs = DAG.getVTList(IntVT);
7922 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7923}
7924
7925SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7926 SelectionDAG &DAG) const {
7927 // When we pop the dynamic allocation we need to restore the SP link.
7928 SDLoc dl(Op);
7929
7930 // Get the correct type for pointers.
7931 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7932
7933 // Construct the stack pointer operand.
7934 bool isPPC64 = Subtarget.isPPC64();
7935 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7936 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7937
7938 // Get the operands for the STACKRESTORE.
7939 SDValue Chain = Op.getOperand(0);
7940 SDValue SaveSP = Op.getOperand(1);
7941
7942 // Load the old link SP.
7943 SDValue LoadLinkSP =
7944 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7945
7946 // Restore the stack pointer.
7947 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7948
7949 // Store the old link SP.
7950 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7951}
7952
7953SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7955 bool isPPC64 = Subtarget.isPPC64();
7956 EVT PtrVT = getPointerTy(MF.getDataLayout());
7957
7958 // Get current frame pointer save index. The users of this index will be
7959 // primarily DYNALLOC instructions.
7961 int RASI = FI->getReturnAddrSaveIndex();
7962
7963 // If the frame pointer save index hasn't been defined yet.
7964 if (!RASI) {
7965 // Find out what the fix offset of the frame pointer save area.
7966 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7967 // Allocate the frame index for frame pointer save area.
7968 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7969 // Save the result.
7970 FI->setReturnAddrSaveIndex(RASI);
7971 }
7972 return DAG.getFrameIndex(RASI, PtrVT);
7973}
7974
7975SDValue
7976PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7978 bool isPPC64 = Subtarget.isPPC64();
7979 EVT PtrVT = getPointerTy(MF.getDataLayout());
7980
7981 // Get current frame pointer save index. The users of this index will be
7982 // primarily DYNALLOC instructions.
7984 int FPSI = FI->getFramePointerSaveIndex();
7985
7986 // If the frame pointer save index hasn't been defined yet.
7987 if (!FPSI) {
7988 // Find out what the fix offset of the frame pointer save area.
7989 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7990 // Allocate the frame index for frame pointer save area.
7991 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7992 // Save the result.
7993 FI->setFramePointerSaveIndex(FPSI);
7994 }
7995 return DAG.getFrameIndex(FPSI, PtrVT);
7996}
7997
7998SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7999 SelectionDAG &DAG) const {
8001 // Get the inputs.
8002 SDValue Chain = Op.getOperand(0);
8003 SDValue Size = Op.getOperand(1);
8004 SDLoc dl(Op);
8005
8006 // Get the correct type for pointers.
8007 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8008 // Negate the size.
8009 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8010 DAG.getConstant(0, dl, PtrVT), Size);
8011 // Construct a node for the frame pointer save index.
8012 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8013 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8014 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8015 if (hasInlineStackProbe(MF))
8016 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8017 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8018}
8019
8020SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8021 SelectionDAG &DAG) const {
8023
8024 bool isPPC64 = Subtarget.isPPC64();
8025 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8026
8027 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8028 return DAG.getFrameIndex(FI, PtrVT);
8029}
8030
8031SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8032 SelectionDAG &DAG) const {
8033 SDLoc DL(Op);
8034 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8035 DAG.getVTList(MVT::i32, MVT::Other),
8036 Op.getOperand(0), Op.getOperand(1));
8037}
8038
8039SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8040 SelectionDAG &DAG) const {
8041 SDLoc DL(Op);
8042 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8043 Op.getOperand(0), Op.getOperand(1));
8044}
8045
8046SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8047 if (Op.getValueType().isVector())
8048 return LowerVectorLoad(Op, DAG);
8049
8050 assert(Op.getValueType() == MVT::i1 &&
8051 "Custom lowering only for i1 loads");
8052
8053 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8054
8055 SDLoc dl(Op);
8056 LoadSDNode *LD = cast<LoadSDNode>(Op);
8057
8058 SDValue Chain = LD->getChain();
8059 SDValue BasePtr = LD->getBasePtr();
8060 MachineMemOperand *MMO = LD->getMemOperand();
8061
8062 SDValue NewLD =
8063 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8064 BasePtr, MVT::i8, MMO);
8065 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8066
8067 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8068 return DAG.getMergeValues(Ops, dl);
8069}
8070
8071SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8072 if (Op.getOperand(1).getValueType().isVector())
8073 return LowerVectorStore(Op, DAG);
8074
8075 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8076 "Custom lowering only for i1 stores");
8077
8078 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8079
8080 SDLoc dl(Op);
8081 StoreSDNode *ST = cast<StoreSDNode>(Op);
8082
8083 SDValue Chain = ST->getChain();
8084 SDValue BasePtr = ST->getBasePtr();
8085 SDValue Value = ST->getValue();
8086 MachineMemOperand *MMO = ST->getMemOperand();
8087
8089 Value);
8090 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8091}
8092
8093// FIXME: Remove this once the ANDI glue bug is fixed:
8094SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8095 assert(Op.getValueType() == MVT::i1 &&
8096 "Custom lowering only for i1 results");
8097
8098 SDLoc DL(Op);
8099 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8100}
8101
8102SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8103 SelectionDAG &DAG) const {
8104
8105 // Implements a vector truncate that fits in a vector register as a shuffle.
8106 // We want to legalize vector truncates down to where the source fits in
8107 // a vector register (and target is therefore smaller than vector register
8108 // size). At that point legalization will try to custom lower the sub-legal
8109 // result and get here - where we can contain the truncate as a single target
8110 // operation.
8111
8112 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8113 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8114 //
8115 // We will implement it for big-endian ordering as this (where x denotes
8116 // undefined):
8117 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8118 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8119 //
8120 // The same operation in little-endian ordering will be:
8121 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8122 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8123
8124 EVT TrgVT = Op.getValueType();
8125 assert(TrgVT.isVector() && "Vector type expected.");
8126 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8127 EVT EltVT = TrgVT.getVectorElementType();
8128 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8129 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8130 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8131 return SDValue();
8132
8133 SDValue N1 = Op.getOperand(0);
8134 EVT SrcVT = N1.getValueType();
8135 unsigned SrcSize = SrcVT.getSizeInBits();
8136 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8137 !llvm::has_single_bit<uint32_t>(
8139 return SDValue();
8140 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8141 return SDValue();
8142
8143 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8144 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8145
8146 SDLoc DL(Op);
8147 SDValue Op1, Op2;
8148 if (SrcSize == 256) {
8149 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8150 EVT SplitVT =
8152 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8153 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8154 DAG.getConstant(0, DL, VecIdxTy));
8155 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8156 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8157 }
8158 else {
8159 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8160 Op2 = DAG.getUNDEF(WideVT);
8161 }
8162
8163 // First list the elements we want to keep.
8164 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8165 SmallVector<int, 16> ShuffV;
8166 if (Subtarget.isLittleEndian())
8167 for (unsigned i = 0; i < TrgNumElts; ++i)
8168 ShuffV.push_back(i * SizeMult);
8169 else
8170 for (unsigned i = 1; i <= TrgNumElts; ++i)
8171 ShuffV.push_back(i * SizeMult - 1);
8172
8173 // Populate the remaining elements with undefs.
8174 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8175 // ShuffV.push_back(i + WideNumElts);
8176 ShuffV.push_back(WideNumElts + 1);
8177
8178 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8179 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8180 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8181}
8182
8183/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8184/// possible.
8185SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8186 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8187 EVT ResVT = Op.getValueType();
8188 EVT CmpVT = Op.getOperand(0).getValueType();
8189 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8190 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8191 SDLoc dl(Op);
8192
8193 // Without power9-vector, we don't have native instruction for f128 comparison.
8194 // Following transformation to libcall is needed for setcc:
8195 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8196 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8197 SDValue Z = DAG.getSetCC(
8198 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8199 LHS, RHS, CC);
8200 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8201 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8202 }
8203
8204 // Not FP, or using SPE? Not a fsel.
8205 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8206 Subtarget.hasSPE())
8207 return Op;
8208
8209 SDNodeFlags Flags = Op.getNode()->getFlags();
8210
8211 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8212 // presence of infinities.
8213 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8214 switch (CC) {
8215 default:
8216 break;
8217 case ISD::SETOGT:
8218 case ISD::SETGT:
8219 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8220 case ISD::SETOLT:
8221 case ISD::SETLT:
8222 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8223 }
8224 }
8225
8226 // We might be able to do better than this under some circumstances, but in
8227 // general, fsel-based lowering of select is a finite-math-only optimization.
8228 // For more information, see section F.3 of the 2.06 ISA specification.
8229 // With ISA 3.0
8230 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8231 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8232 ResVT == MVT::f128)
8233 return Op;
8234
8235 // If the RHS of the comparison is a 0.0, we don't need to do the
8236 // subtraction at all.
8237 SDValue Sel1;
8238 if (isFloatingPointZero(RHS))
8239 switch (CC) {
8240 default: break; // SETUO etc aren't handled by fsel.
8241 case ISD::SETNE:
8242 std::swap(TV, FV);
8243 [[fallthrough]];
8244 case ISD::SETEQ:
8245 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8246 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8247 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8248 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8249 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8250 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8251 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8252 case ISD::SETULT:
8253 case ISD::SETLT:
8254 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8255 [[fallthrough]];
8256 case ISD::SETOGE:
8257 case ISD::SETGE:
8258 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8259 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8260 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8261 case ISD::SETUGT:
8262 case ISD::SETGT:
8263 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8264 [[fallthrough]];
8265 case ISD::SETOLE:
8266 case ISD::SETLE:
8267 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8268 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8269 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8270 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8271 }
8272
8273 SDValue Cmp;
8274 switch (CC) {
8275 default: break; // SETUO etc aren't handled by fsel.
8276 case ISD::SETNE:
8277 std::swap(TV, FV);
8278 [[fallthrough]];
8279 case ISD::SETEQ:
8280 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8281 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8282 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8283 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8284 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8285 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8286 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8287 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8288 case ISD::SETULT:
8289 case ISD::SETLT:
8290 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8291 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8292 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8293 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8294 case ISD::SETOGE:
8295 case ISD::SETGE:
8296 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8297 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8298 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8299 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8300 case ISD::SETUGT:
8301 case ISD::SETGT:
8302 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8303 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8304 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8305 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8306 case ISD::SETOLE:
8307 case ISD::SETLE:
8308 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8309 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8310 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8311 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8312 }
8313 return Op;
8314}
8315
8316static unsigned getPPCStrictOpcode(unsigned Opc) {
8317 switch (Opc) {
8318 default:
8319 llvm_unreachable("No strict version of this opcode!");
8320 case PPCISD::FCTIDZ:
8321 return PPCISD::STRICT_FCTIDZ;
8322 case PPCISD::FCTIWZ:
8323 return PPCISD::STRICT_FCTIWZ;
8324 case PPCISD::FCTIDUZ:
8326 case PPCISD::FCTIWUZ:
8328 case PPCISD::FCFID:
8329 return PPCISD::STRICT_FCFID;
8330 case PPCISD::FCFIDU:
8331 return PPCISD::STRICT_FCFIDU;
8332 case PPCISD::FCFIDS:
8333 return PPCISD::STRICT_FCFIDS;
8334 case PPCISD::FCFIDUS:
8336 }
8337}
8338
8340 const PPCSubtarget &Subtarget) {
8341 SDLoc dl(Op);
8342 bool IsStrict = Op->isStrictFPOpcode();
8343 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8344 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8345
8346 // TODO: Any other flags to propagate?
8347 SDNodeFlags Flags;
8348 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8349
8350 // For strict nodes, source is the second operand.
8351 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8352 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8353 MVT DestTy = Op.getSimpleValueType();
8354 assert(Src.getValueType().isFloatingPoint() &&
8355 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8356 DestTy == MVT::i64) &&
8357 "Invalid FP_TO_INT types");
8358 if (Src.getValueType() == MVT::f32) {
8359 if (IsStrict) {
8360 Src =
8362 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8363 Chain = Src.getValue(1);
8364 } else
8365 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8366 }
8367 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8368 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8369 unsigned Opc = ISD::DELETED_NODE;
8370 switch (DestTy.SimpleTy) {
8371 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8372 case MVT::i32:
8373 Opc = IsSigned ? PPCISD::FCTIWZ
8374 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8375 break;
8376 case MVT::i64:
8377 assert((IsSigned || Subtarget.hasFPCVT()) &&
8378 "i64 FP_TO_UINT is supported only with FPCVT");
8379 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8380 }
8381 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8382 SDValue Conv;
8383 if (IsStrict) {
8384 Opc = getPPCStrictOpcode(Opc);
8385 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8386 Flags);
8387 } else {
8388 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8389 }
8390 return Conv;
8391}
8392
8393void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8394 SelectionDAG &DAG,
8395 const SDLoc &dl) const {
8396 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8397 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8398 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8399 bool IsStrict = Op->isStrictFPOpcode();
8400
8401 // Convert the FP value to an int value through memory.
8402 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8403 (IsSigned || Subtarget.hasFPCVT());
8404 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8405 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8406 MachinePointerInfo MPI =
8408
8409 // Emit a store to the stack slot.
8410 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8411 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8412 if (i32Stack) {
8414 Alignment = Align(4);
8415 MachineMemOperand *MMO =
8416 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8417 SDValue Ops[] = { Chain, Tmp, FIPtr };
8418 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8419 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8420 } else
8421 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8422
8423 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8424 // add in a bias on big endian.
8425 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8426 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8427 DAG.getConstant(4, dl, FIPtr.getValueType()));
8428 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8429 }
8430
8431 RLI.Chain = Chain;
8432 RLI.Ptr = FIPtr;
8433 RLI.MPI = MPI;
8434 RLI.Alignment = Alignment;
8435}
8436
8437/// Custom lowers floating point to integer conversions to use
8438/// the direct move instructions available in ISA 2.07 to avoid the
8439/// need for load/store combinations.
8440SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8441 SelectionDAG &DAG,
8442 const SDLoc &dl) const {
8443 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8444 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8445 if (Op->isStrictFPOpcode())
8446 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8447 else
8448 return Mov;
8449}
8450
8451SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8452 const SDLoc &dl) const {
8453 bool IsStrict = Op->isStrictFPOpcode();
8454 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8455 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8456 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8457 EVT SrcVT = Src.getValueType();
8458 EVT DstVT = Op.getValueType();
8459
8460 // FP to INT conversions are legal for f128.
8461 if (SrcVT == MVT::f128)
8462 return Subtarget.hasP9Vector() ? Op : SDValue();
8463
8464 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8465 // PPC (the libcall is not available).
8466 if (SrcVT == MVT::ppcf128) {
8467 if (DstVT == MVT::i32) {
8468 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8469 // set other fast-math flags to FP operations in both strict and
8470 // non-strict cases. (FP_TO_SINT, FSUB)
8472 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8473
8474 if (IsSigned) {
8475 SDValue Lo, Hi;
8476 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8477
8478 // Add the two halves of the long double in round-to-zero mode, and use
8479 // a smaller FP_TO_SINT.
8480 if (IsStrict) {
8482 DAG.getVTList(MVT::f64, MVT::Other),
8483 {Op.getOperand(0), Lo, Hi}, Flags);
8484 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8485 DAG.getVTList(MVT::i32, MVT::Other),
8486 {Res.getValue(1), Res}, Flags);
8487 } else {
8488 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8489 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8490 }
8491 } else {
8492 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8493 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8494 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8495 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8496 if (IsStrict) {
8497 // Sel = Src < 0x80000000
8498 // FltOfs = select Sel, 0.0, 0x80000000
8499 // IntOfs = select Sel, 0, 0x80000000
8500 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8501 SDValue Chain = Op.getOperand(0);
8502 EVT SetCCVT =
8503 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8504 EVT DstSetCCVT =
8505 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8506 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8507 Chain, true);
8508 Chain = Sel.getValue(1);
8509
8510 SDValue FltOfs = DAG.getSelect(
8511 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8512 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8513
8514 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8515 DAG.getVTList(SrcVT, MVT::Other),
8516 {Chain, Src, FltOfs}, Flags);
8517 Chain = Val.getValue(1);
8518 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8519 DAG.getVTList(DstVT, MVT::Other),
8520 {Chain, Val}, Flags);
8521 Chain = SInt.getValue(1);
8522 SDValue IntOfs = DAG.getSelect(
8523 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8524 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8525 return DAG.getMergeValues({Result, Chain}, dl);
8526 } else {
8527 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8528 // FIXME: generated code sucks.
8529 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8530 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8531 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8532 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8533 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8534 }
8535 }
8536 }
8537
8538 return SDValue();
8539 }
8540
8541 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8542 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8543
8544 ReuseLoadInfo RLI;
8545 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8546
8547 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8548 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8549}
8550
8551// We're trying to insert a regular store, S, and then a load, L. If the
8552// incoming value, O, is a load, we might just be able to have our load use the
8553// address used by O. However, we don't know if anything else will store to
8554// that address before we can load from it. To prevent this situation, we need
8555// to insert our load, L, into the chain as a peer of O. To do this, we give L
8556// the same chain operand as O, we create a token factor from the chain results
8557// of O and L, and we replace all uses of O's chain result with that token
8558// factor (see spliceIntoChain below for this last part).
8559bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8560 ReuseLoadInfo &RLI,
8561 SelectionDAG &DAG,
8562 ISD::LoadExtType ET) const {
8563 // Conservatively skip reusing for constrained FP nodes.
8564 if (Op->isStrictFPOpcode())
8565 return false;
8566
8567 SDLoc dl(Op);
8568 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8569 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8570 if (ET == ISD::NON_EXTLOAD &&
8571 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8572 isOperationLegalOrCustom(Op.getOpcode(),
8573 Op.getOperand(0).getValueType())) {
8574
8575 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8576 return true;
8577 }
8578
8579 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8580 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8581 LD->isNonTemporal())
8582 return false;
8583 if (LD->getMemoryVT() != MemVT)
8584 return false;
8585
8586 // If the result of the load is an illegal type, then we can't build a
8587 // valid chain for reuse since the legalised loads and token factor node that
8588 // ties the legalised loads together uses a different output chain then the
8589 // illegal load.
8590 if (!isTypeLegal(LD->getValueType(0)))
8591 return false;
8592
8593 RLI.Ptr = LD->getBasePtr();
8594 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8595 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8596 "Non-pre-inc AM on PPC?");
8597 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8598 LD->getOffset());
8599 }
8600
8601 RLI.Chain = LD->getChain();
8602 RLI.MPI = LD->getPointerInfo();
8603 RLI.IsDereferenceable = LD->isDereferenceable();
8604 RLI.IsInvariant = LD->isInvariant();
8605 RLI.Alignment = LD->getAlign();
8606 RLI.AAInfo = LD->getAAInfo();
8607 RLI.Ranges = LD->getRanges();
8608
8609 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8610 return true;
8611}
8612
8613// Given the head of the old chain, ResChain, insert a token factor containing
8614// it and NewResChain, and make users of ResChain now be users of that token
8615// factor.
8616// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8617void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8618 SDValue NewResChain,
8619 SelectionDAG &DAG) const {
8620 if (!ResChain)
8621 return;
8622
8623 SDLoc dl(NewResChain);
8624
8625 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8626 NewResChain, DAG.getUNDEF(MVT::Other));
8627 assert(TF.getNode() != NewResChain.getNode() &&
8628 "A new TF really is required here");
8629
8630 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8631 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8632}
8633
8634/// Analyze profitability of direct move
8635/// prefer float load to int load plus direct move
8636/// when there is no integer use of int load
8637bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8638 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8639 if (Origin->getOpcode() != ISD::LOAD)
8640 return true;
8641
8642 // If there is no LXSIBZX/LXSIHZX, like Power8,
8643 // prefer direct move if the memory size is 1 or 2 bytes.
8644 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8645 if (!Subtarget.hasP9Vector() &&
8646 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8647 return true;
8648
8649 for (SDNode::use_iterator UI = Origin->use_begin(),
8650 UE = Origin->use_end();
8651 UI != UE; ++UI) {
8652
8653 // Only look at the users of the loaded value.
8654 if (UI.getUse().get().getResNo() != 0)
8655 continue;
8656
8657 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8658 UI->getOpcode() != ISD::UINT_TO_FP &&
8659 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8660 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8661 return true;
8662 }
8663
8664 return false;
8665}
8666
8668 const PPCSubtarget &Subtarget,
8669 SDValue Chain = SDValue()) {
8670 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8671 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8672 SDLoc dl(Op);
8673
8674 // TODO: Any other flags to propagate?
8675 SDNodeFlags Flags;
8676 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8677
8678 // If we have FCFIDS, then use it when converting to single-precision.
8679 // Otherwise, convert to double-precision and then round.
8680 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8681 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8682 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8683 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8684 if (Op->isStrictFPOpcode()) {
8685 if (!Chain)
8686 Chain = Op.getOperand(0);
8687 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8688 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8689 } else
8690 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8691}
8692
8693/// Custom lowers integer to floating point conversions to use
8694/// the direct move instructions available in ISA 2.07 to avoid the
8695/// need for load/store combinations.
8696SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8697 SelectionDAG &DAG,
8698 const SDLoc &dl) const {
8699 assert((Op.getValueType() == MVT::f32 ||
8700 Op.getValueType() == MVT::f64) &&
8701 "Invalid floating point type as target of conversion");
8702 assert(Subtarget.hasFPCVT() &&
8703 "Int to FP conversions with direct moves require FPCVT");
8704 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8705 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8706 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8707 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8708 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8709 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8710 return convertIntToFP(Op, Mov, DAG, Subtarget);
8711}
8712
8713static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8714
8715 EVT VecVT = Vec.getValueType();
8716 assert(VecVT.isVector() && "Expected a vector type.");
8717 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8718
8719 EVT EltVT = VecVT.getVectorElementType();
8720 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8721 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8722
8723 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8724 SmallVector<SDValue, 16> Ops(NumConcat);
8725 Ops[0] = Vec;
8726 SDValue UndefVec = DAG.getUNDEF(VecVT);
8727 for (unsigned i = 1; i < NumConcat; ++i)
8728 Ops[i] = UndefVec;
8729
8730 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8731}
8732
8733SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8734 const SDLoc &dl) const {
8735 bool IsStrict = Op->isStrictFPOpcode();
8736 unsigned Opc = Op.getOpcode();
8737 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8738 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8740 "Unexpected conversion type");
8741 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8742 "Supports conversions to v2f64/v4f32 only.");
8743
8744 // TODO: Any other flags to propagate?
8746 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8747
8748 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8749 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8750
8751 SDValue Wide = widenVec(DAG, Src, dl);
8752 EVT WideVT = Wide.getValueType();
8753 unsigned WideNumElts = WideVT.getVectorNumElements();
8754 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8755
8756 SmallVector<int, 16> ShuffV;
8757 for (unsigned i = 0; i < WideNumElts; ++i)
8758 ShuffV.push_back(i + WideNumElts);
8759
8760 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8761 int SaveElts = FourEltRes ? 4 : 2;
8762 if (Subtarget.isLittleEndian())
8763 for (int i = 0; i < SaveElts; i++)
8764 ShuffV[i * Stride] = i;
8765 else
8766 for (int i = 1; i <= SaveElts; i++)
8767 ShuffV[i * Stride - 1] = i - 1;
8768
8769 SDValue ShuffleSrc2 =
8770 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8771 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8772
8773 SDValue Extend;
8774 if (SignedConv) {
8775 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8776 EVT ExtVT = Src.getValueType();
8777 if (Subtarget.hasP9Altivec())
8778 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8779 IntermediateVT.getVectorNumElements());
8780
8781 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8782 DAG.getValueType(ExtVT));
8783 } else
8784 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8785
8786 if (IsStrict)
8787 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8788 {Op.getOperand(0), Extend}, Flags);
8789
8790 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8791}
8792
8793SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8794 SelectionDAG &DAG) const {
8795 SDLoc dl(Op);
8796 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8797 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8798 bool IsStrict = Op->isStrictFPOpcode();
8799 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8800 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8801
8802 // TODO: Any other flags to propagate?
8804 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8805
8806 EVT InVT = Src.getValueType();
8807 EVT OutVT = Op.getValueType();
8808 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8809 isOperationCustom(Op.getOpcode(), InVT))
8810 return LowerINT_TO_FPVector(Op, DAG, dl);
8811
8812 // Conversions to f128 are legal.
8813 if (Op.getValueType() == MVT::f128)
8814 return Subtarget.hasP9Vector() ? Op : SDValue();
8815
8816 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8817 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8818 return SDValue();
8819
8820 if (Src.getValueType() == MVT::i1) {
8821 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8822 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8823 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8824 if (IsStrict)
8825 return DAG.getMergeValues({Sel, Chain}, dl);
8826 else
8827 return Sel;
8828 }
8829
8830 // If we have direct moves, we can do all the conversion, skip the store/load
8831 // however, without FPCVT we can't do most conversions.
8832 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8833 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8834 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8835
8836 assert((IsSigned || Subtarget.hasFPCVT()) &&
8837 "UINT_TO_FP is supported only with FPCVT");
8838
8839 if (Src.getValueType() == MVT::i64) {
8840 SDValue SINT = Src;
8841 // When converting to single-precision, we actually need to convert
8842 // to double-precision first and then round to single-precision.
8843 // To avoid double-rounding effects during that operation, we have
8844 // to prepare the input operand. Bits that might be truncated when
8845 // converting to double-precision are replaced by a bit that won't
8846 // be lost at this stage, but is below the single-precision rounding
8847 // position.
8848 //
8849 // However, if -enable-unsafe-fp-math is in effect, accept double
8850 // rounding to avoid the extra overhead.
8851 if (Op.getValueType() == MVT::f32 &&
8852 !Subtarget.hasFPCVT() &&
8854
8855 // Twiddle input to make sure the low 11 bits are zero. (If this
8856 // is the case, we are guaranteed the value will fit into the 53 bit
8857 // mantissa of an IEEE double-precision value without rounding.)
8858 // If any of those low 11 bits were not zero originally, make sure
8859 // bit 12 (value 2048) is set instead, so that the final rounding
8860 // to single-precision gets the correct result.
8861 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8862 SINT, DAG.getConstant(2047, dl, MVT::i64));
8863 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8864 Round, DAG.getConstant(2047, dl, MVT::i64));
8865 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8866 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8867 Round, DAG.getConstant(-2048, dl, MVT::i64));
8868
8869 // However, we cannot use that value unconditionally: if the magnitude
8870 // of the input value is small, the bit-twiddling we did above might
8871 // end up visibly changing the output. Fortunately, in that case, we
8872 // don't need to twiddle bits since the original input will convert
8873 // exactly to double-precision floating-point already. Therefore,
8874 // construct a conditional to use the original value if the top 11
8875 // bits are all sign-bit copies, and use the rounded value computed
8876 // above otherwise.
8877 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8878 SINT, DAG.getConstant(53, dl, MVT::i32));
8879 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8880 Cond, DAG.getConstant(1, dl, MVT::i64));
8881 Cond = DAG.getSetCC(
8882 dl,
8883 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8884 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8885
8886 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8887 }
8888
8889 ReuseLoadInfo RLI;
8890 SDValue Bits;
8891
8893 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8894 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8895 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8896 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8897 } else if (Subtarget.hasLFIWAX() &&
8898 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8899 MachineMemOperand *MMO =
8901 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8902 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8904 DAG.getVTList(MVT::f64, MVT::Other),
8905 Ops, MVT::i32, MMO);
8906 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8907 } else if (Subtarget.hasFPCVT() &&
8908 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8909 MachineMemOperand *MMO =
8911 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8912 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8914 DAG.getVTList(MVT::f64, MVT::Other),
8915 Ops, MVT::i32, MMO);
8916 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8917 } else if (((Subtarget.hasLFIWAX() &&
8918 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8919 (Subtarget.hasFPCVT() &&
8920 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8921 SINT.getOperand(0).getValueType() == MVT::i32) {
8922 MachineFrameInfo &MFI = MF.getFrameInfo();
8923 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8924
8925 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8926 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8927
8928 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8930 DAG.getMachineFunction(), FrameIdx));
8931 Chain = Store;
8932
8933 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8934 "Expected an i32 store");
8935
8936 RLI.Ptr = FIdx;
8937 RLI.Chain = Chain;
8938 RLI.MPI =
8940 RLI.Alignment = Align(4);
8941
8942 MachineMemOperand *MMO =
8944 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8945 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8948 dl, DAG.getVTList(MVT::f64, MVT::Other),
8949 Ops, MVT::i32, MMO);
8950 Chain = Bits.getValue(1);
8951 } else
8952 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8953
8954 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8955 if (IsStrict)
8956 Chain = FP.getValue(1);
8957
8958 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8959 if (IsStrict)
8961 DAG.getVTList(MVT::f32, MVT::Other),
8962 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8963 else
8964 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8965 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8966 }
8967 return FP;
8968 }
8969
8970 assert(Src.getValueType() == MVT::i32 &&
8971 "Unhandled INT_TO_FP type in custom expander!");
8972 // Since we only generate this in 64-bit mode, we can take advantage of
8973 // 64-bit registers. In particular, sign extend the input value into the
8974 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8975 // then lfd it and fcfid it.
8977 MachineFrameInfo &MFI = MF.getFrameInfo();
8978 EVT PtrVT = getPointerTy(MF.getDataLayout());
8979
8980 SDValue Ld;
8981 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8982 ReuseLoadInfo RLI;
8983 bool ReusingLoad;
8984 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8985 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8986 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8987
8988 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8990 DAG.getMachineFunction(), FrameIdx));
8991 Chain = Store;
8992
8993 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8994 "Expected an i32 store");
8995
8996 RLI.Ptr = FIdx;
8997 RLI.Chain = Chain;
8998 RLI.MPI =
9000 RLI.Alignment = Align(4);
9001 }
9002
9003 MachineMemOperand *MMO =
9005 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9006 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9007 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9008 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9009 MVT::i32, MMO);
9010 Chain = Ld.getValue(1);
9011 if (ReusingLoad)
9012 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
9013 } else {
9014 assert(Subtarget.isPPC64() &&
9015 "i32->FP without LFIWAX supported only on PPC64");
9016
9017 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9018 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9019
9020 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9021
9022 // STD the extended value into the stack slot.
9023 SDValue Store = DAG.getStore(
9024 Chain, dl, Ext64, FIdx,
9026 Chain = Store;
9027
9028 // Load the value as a double.
9029 Ld = DAG.getLoad(
9030 MVT::f64, dl, Chain, FIdx,
9032 Chain = Ld.getValue(1);
9033 }
9034
9035 // FCFID it and return it.
9036 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9037 if (IsStrict)
9038 Chain = FP.getValue(1);
9039 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9040 if (IsStrict)
9042 DAG.getVTList(MVT::f32, MVT::Other),
9043 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9044 else
9045 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9046 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9047 }
9048 return FP;
9049}
9050
9051SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9052 SelectionDAG &DAG) const {
9053 SDLoc dl(Op);
9054 /*
9055 The rounding mode is in bits 30:31 of FPSR, and has the following
9056 settings:
9057 00 Round to nearest
9058 01 Round to 0
9059 10 Round to +inf
9060 11 Round to -inf
9061
9062 GET_ROUNDING, on the other hand, expects the following:
9063 -1 Undefined
9064 0 Round to 0
9065 1 Round to nearest
9066 2 Round to +inf
9067 3 Round to -inf
9068
9069 To perform the conversion, we do:
9070 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9071 */
9072
9074 EVT VT = Op.getValueType();
9075 EVT PtrVT = getPointerTy(MF.getDataLayout());
9076
9077 // Save FP Control Word to register
9078 SDValue Chain = Op.getOperand(0);
9079 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9080 Chain = MFFS.getValue(1);
9081
9082 SDValue CWD;
9083 if (isTypeLegal(MVT::i64)) {
9084 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9085 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9086 } else {
9087 // Save FP register to stack slot
9088 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9089 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9090 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9091
9092 // Load FP Control Word from low 32 bits of stack slot.
9094 "Stack slot adjustment is valid only on big endian subtargets!");
9095 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9096 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9097 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9098 Chain = CWD.getValue(1);
9099 }
9100
9101 // Transform as necessary
9102 SDValue CWD1 =
9103 DAG.getNode(ISD::AND, dl, MVT::i32,
9104 CWD, DAG.getConstant(3, dl, MVT::i32));
9105 SDValue CWD2 =
9106 DAG.getNode(ISD::SRL, dl, MVT::i32,
9107 DAG.getNode(ISD::AND, dl, MVT::i32,
9108 DAG.getNode(ISD::XOR, dl, MVT::i32,
9109 CWD, DAG.getConstant(3, dl, MVT::i32)),
9110 DAG.getConstant(3, dl, MVT::i32)),
9111 DAG.getConstant(1, dl, MVT::i32));
9112
9113 SDValue RetVal =
9114 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9115
9116 RetVal =
9118 dl, VT, RetVal);
9119
9120 return DAG.getMergeValues({RetVal, Chain}, dl);
9121}
9122
9123SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9124 EVT VT = Op.getValueType();
9125 unsigned BitWidth = VT.getSizeInBits();
9126 SDLoc dl(Op);
9127 assert(Op.getNumOperands() == 3 &&
9128 VT == Op.getOperand(1).getValueType() &&
9129 "Unexpected SHL!");
9130
9131 // Expand into a bunch of logical ops. Note that these ops
9132 // depend on the PPC behavior for oversized shift amounts.
9133 SDValue Lo = Op.getOperand(0);
9134 SDValue Hi = Op.getOperand(1);
9135 SDValue Amt = Op.getOperand(2);
9136 EVT AmtVT = Amt.getValueType();
9137
9138 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9139 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9140 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9141 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9142 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9143 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9144 DAG.getConstant(-BitWidth, dl, AmtVT));
9145 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9146 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9147 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9148 SDValue OutOps[] = { OutLo, OutHi };
9149 return DAG.getMergeValues(OutOps, dl);
9150}
9151
9152SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9153 EVT VT = Op.getValueType();
9154 SDLoc dl(Op);
9155 unsigned BitWidth = VT.getSizeInBits();
9156 assert(Op.getNumOperands() == 3 &&
9157 VT == Op.getOperand(1).getValueType() &&
9158 "Unexpected SRL!");
9159
9160 // Expand into a bunch of logical ops. Note that these ops
9161 // depend on the PPC behavior for oversized shift amounts.
9162 SDValue Lo = Op.getOperand(0);
9163 SDValue Hi = Op.getOperand(1);
9164 SDValue Amt = Op.getOperand(2);
9165 EVT AmtVT = Amt.getValueType();
9166
9167 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9168 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9169 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9170 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9171 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9172 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9173 DAG.getConstant(-BitWidth, dl, AmtVT));
9174 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9175 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9176 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9177 SDValue OutOps[] = { OutLo, OutHi };
9178 return DAG.getMergeValues(OutOps, dl);
9179}
9180
9181SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9182 SDLoc dl(Op);
9183 EVT VT = Op.getValueType();
9184 unsigned BitWidth = VT.getSizeInBits();
9185 assert(Op.getNumOperands() == 3 &&
9186 VT == Op.getOperand(1).getValueType() &&
9187 "Unexpected SRA!");
9188
9189 // Expand into a bunch of logical ops, followed by a select_cc.
9190 SDValue Lo = Op.getOperand(0);
9191 SDValue Hi = Op.getOperand(1);
9192 SDValue Amt = Op.getOperand(2);
9193 EVT AmtVT = Amt.getValueType();
9194
9195 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9196 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9197 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9198 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9199 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9200 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9201 DAG.getConstant(-BitWidth, dl, AmtVT));
9202 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9203 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9204 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9205 Tmp4, Tmp6, ISD::SETLE);
9206 SDValue OutOps[] = { OutLo, OutHi };
9207 return DAG.getMergeValues(OutOps, dl);
9208}
9209
9210SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9211 SelectionDAG &DAG) const {
9212 SDLoc dl(Op);
9213 EVT VT = Op.getValueType();
9214 unsigned BitWidth = VT.getSizeInBits();
9215
9216 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9217 SDValue X = Op.getOperand(0);
9218 SDValue Y = Op.getOperand(1);
9219 SDValue Z = Op.getOperand(2);
9220 EVT AmtVT = Z.getValueType();
9221
9222 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9223 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9224 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9225 // on PowerPC shift by BW being well defined.
9226 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9227 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9228 SDValue SubZ =
9229 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9230 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9231 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9232 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9233}
9234
9235//===----------------------------------------------------------------------===//
9236// Vector related lowering.
9237//
9238
9239/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9240/// element size of SplatSize. Cast the result to VT.
9241static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9242 SelectionDAG &DAG, const SDLoc &dl) {
9243 static const MVT VTys[] = { // canonical VT to use for each size.
9244 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9245 };
9246
9247 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9248
9249 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9250 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9251 SplatSize = 1;
9252 Val = 0xFF;
9253 }
9254
9255 EVT CanonicalVT = VTys[SplatSize-1];
9256
9257 // Build a canonical splat for this value.
9258 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9259}
9260
9261/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9262/// specified intrinsic ID.
9264 const SDLoc &dl, EVT DestVT = MVT::Other) {
9265 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9266 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9267 DAG.getConstant(IID, dl, MVT::i32), Op);
9268}
9269
9270/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9271/// specified intrinsic ID.
9272static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9273 SelectionDAG &DAG, const SDLoc &dl,
9274 EVT DestVT = MVT::Other) {
9275 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9276 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9277 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9278}
9279
9280/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9281/// specified intrinsic ID.
9282static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9283 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9284 EVT DestVT = MVT::Other) {
9285 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9286 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9287 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9288}
9289
9290/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9291/// amount. The result has the specified value type.
9292static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9293 SelectionDAG &DAG, const SDLoc &dl) {
9294 // Force LHS/RHS to be the right type.
9295 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9296 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9297
9298 int Ops[16];
9299 for (unsigned i = 0; i != 16; ++i)
9300 Ops[i] = i + Amt;
9301 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9302 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9303}
9304
9305/// Do we have an efficient pattern in a .td file for this node?
9306///
9307/// \param V - pointer to the BuildVectorSDNode being matched
9308/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9309///
9310/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9311/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9312/// the opposite is true (expansion is beneficial) are:
9313/// - The node builds a vector out of integers that are not 32 or 64-bits
9314/// - The node builds a vector out of constants
9315/// - The node is a "load-and-splat"
9316/// In all other cases, we will choose to keep the BUILD_VECTOR.
9318 bool HasDirectMove,
9319 bool HasP8Vector) {
9320 EVT VecVT = V->getValueType(0);
9321 bool RightType = VecVT == MVT::v2f64 ||
9322 (HasP8Vector && VecVT == MVT::v4f32) ||
9323 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9324 if (!RightType)
9325 return false;
9326
9327 bool IsSplat = true;
9328 bool IsLoad = false;
9329 SDValue Op0 = V->getOperand(0);
9330
9331 // This function is called in a block that confirms the node is not a constant
9332 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9333 // different constants.
9334 if (V->isConstant())
9335 return false;
9336 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9337 if (V->getOperand(i).isUndef())
9338 return false;
9339 // We want to expand nodes that represent load-and-splat even if the
9340 // loaded value is a floating point truncation or conversion to int.
9341 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9342 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9343 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9344 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9345 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9346 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9347 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9348 IsLoad = true;
9349 // If the operands are different or the input is not a load and has more
9350 // uses than just this BV node, then it isn't a splat.
9351 if (V->getOperand(i) != Op0 ||
9352 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9353 IsSplat = false;
9354 }
9355 return !(IsSplat && IsLoad);
9356}
9357
9358// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9359SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9360
9361 SDLoc dl(Op);
9362 SDValue Op0 = Op->getOperand(0);
9363
9364 if ((Op.getValueType() != MVT::f128) ||
9365 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9366 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9367 (Op0.getOperand(1).getValueType() != MVT::i64) || !Subtarget.isPPC64())
9368 return SDValue();
9369
9370 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9371 Op0.getOperand(1));
9372}
9373
9374static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9375 const SDValue *InputLoad = &Op;
9376 while (InputLoad->getOpcode() == ISD::BITCAST)
9377 InputLoad = &InputLoad->getOperand(0);
9378 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9380 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9381 InputLoad = &InputLoad->getOperand(0);
9382 }
9383 if (InputLoad->getOpcode() != ISD::LOAD)
9384 return nullptr;
9385 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9386 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9387}
9388
9389// Convert the argument APFloat to a single precision APFloat if there is no
9390// loss in information during the conversion to single precision APFloat and the
9391// resulting number is not a denormal number. Return true if successful.
9393 APFloat APFloatToConvert = ArgAPFloat;
9394 bool LosesInfo = true;
9396 &LosesInfo);
9397 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9398 if (Success)
9399 ArgAPFloat = APFloatToConvert;
9400 return Success;
9401}
9402
9403// Bitcast the argument APInt to a double and convert it to a single precision
9404// APFloat, bitcast the APFloat to an APInt and assign it to the original
9405// argument if there is no loss in information during the conversion from
9406// double to single precision APFloat and the resulting number is not a denormal
9407// number. Return true if successful.
9409 double DpValue = ArgAPInt.bitsToDouble();
9410 APFloat APFloatDp(DpValue);
9411 bool Success = convertToNonDenormSingle(APFloatDp);
9412 if (Success)
9413 ArgAPInt = APFloatDp.bitcastToAPInt();
9414 return Success;
9415}
9416
9417// Nondestructive check for convertTonNonDenormSingle.
9419 // Only convert if it loses info, since XXSPLTIDP should
9420 // handle the other case.
9421 APFloat APFloatToConvert = ArgAPFloat;
9422 bool LosesInfo = true;
9424 &LosesInfo);
9425
9426 return (!LosesInfo && !APFloatToConvert.isDenormal());
9427}
9428
9429static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9430 unsigned &Opcode) {
9431 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9432 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9433 return false;
9434
9435 EVT Ty = Op->getValueType(0);
9436 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9437 // as we cannot handle extending loads for these types.
9438 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9439 ISD::isNON_EXTLoad(InputNode))
9440 return true;
9441
9442 EVT MemVT = InputNode->getMemoryVT();
9443 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9444 // memory VT is the same vector element VT type.
9445 // The loads feeding into the v8i16 and v16i8 types will be extending because
9446 // scalar i8/i16 are not legal types.
9447 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9448 (MemVT == Ty.getVectorElementType()))
9449 return true;
9450
9451 if (Ty == MVT::v2i64) {
9452 // Check the extend type, when the input type is i32, and the output vector
9453 // type is v2i64.
9454 if (MemVT == MVT::i32) {
9455 if (ISD::isZEXTLoad(InputNode))
9456 Opcode = PPCISD::ZEXT_LD_SPLAT;
9457 if (ISD::isSEXTLoad(InputNode))
9458 Opcode = PPCISD::SEXT_LD_SPLAT;
9459 }
9460 return true;
9461 }
9462 return false;
9463}
9464
9465// If this is a case we can't handle, return null and let the default
9466// expansion code take care of it. If we CAN select this case, and if it
9467// selects to a single instruction, return Op. Otherwise, if we can codegen
9468// this case more efficiently than a constant pool load, lower it to the
9469// sequence of ops that should be used.
9470SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9471 SelectionDAG &DAG) const {
9472 SDLoc dl(Op);
9473 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9474 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9475
9476 // Check if this is a splat of a constant value.
9477 APInt APSplatBits, APSplatUndef;
9478 unsigned SplatBitSize;
9479 bool HasAnyUndefs;
9480 bool BVNIsConstantSplat =
9481 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9482 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9483
9484 // If it is a splat of a double, check if we can shrink it to a 32 bit
9485 // non-denormal float which when converted back to double gives us the same
9486 // double. This is to exploit the XXSPLTIDP instruction.
9487 // If we lose precision, we use XXSPLTI32DX.
9488 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9489 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9490 // Check the type first to short-circuit so we don't modify APSplatBits if
9491 // this block isn't executed.
9492 if ((Op->getValueType(0) == MVT::v2f64) &&
9493 convertToNonDenormSingle(APSplatBits)) {
9494 SDValue SplatNode = DAG.getNode(
9495 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9496 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9497 return DAG.getBitcast(Op.getValueType(), SplatNode);
9498 } else {
9499 // We may lose precision, so we have to use XXSPLTI32DX.
9500
9501 uint32_t Hi =
9502 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9503 uint32_t Lo =
9504 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9505 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9506
9507 if (!Hi || !Lo)
9508 // If either load is 0, then we should generate XXLXOR to set to 0.
9509 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9510
9511 if (Hi)
9512 SplatNode = DAG.getNode(
9513 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9514 DAG.getTargetConstant(0, dl, MVT::i32),
9515 DAG.getTargetConstant(Hi, dl, MVT::i32));
9516
9517 if (Lo)
9518 SplatNode =
9519 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9520 DAG.getTargetConstant(1, dl, MVT::i32),
9521 DAG.getTargetConstant(Lo, dl, MVT::i32));
9522
9523 return DAG.getBitcast(Op.getValueType(), SplatNode);
9524 }
9525 }
9526
9527 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9528 unsigned NewOpcode = PPCISD::LD_SPLAT;
9529
9530 // Handle load-and-splat patterns as we have instructions that will do this
9531 // in one go.
9532 if (DAG.isSplatValue(Op, true) &&
9533 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9534 const SDValue *InputLoad = &Op.getOperand(0);
9535 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9536
9537 // If the input load is an extending load, it will be an i32 -> i64
9538 // extending load and isValidSplatLoad() will update NewOpcode.
9539 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9540 unsigned ElementSize =
9541 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9542
9543 assert(((ElementSize == 2 * MemorySize)
9544 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9545 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9546 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9547 "Unmatched element size and opcode!\n");
9548
9549 // Checking for a single use of this load, we have to check for vector
9550 // width (128 bits) / ElementSize uses (since each operand of the
9551 // BUILD_VECTOR is a separate use of the value.
9552 unsigned NumUsesOfInputLD = 128 / ElementSize;
9553 for (SDValue BVInOp : Op->ops())
9554 if (BVInOp.isUndef())
9555 NumUsesOfInputLD--;
9556
9557 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9558 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9559 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9560 // 15", but function IsValidSplatLoad() now will only return true when
9561 // the data at index 0 is not nullptr. So we will not get into trouble for
9562 // these cases.
9563 //
9564 // case 1 - lfiwzx/lfiwax
9565 // 1.1: load result is i32 and is sign/zero extend to i64;
9566 // 1.2: build a v2i64 vector type with above loaded value;
9567 // 1.3: the vector has only one value at index 0, others are all undef;
9568 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9569 if (NumUsesOfInputLD == 1 &&
9570 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9571 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9572 Subtarget.hasLFIWAX()))
9573 return SDValue();
9574
9575 // case 2 - lxvr[hb]x
9576 // 2.1: load result is at most i16;
9577 // 2.2: build a vector with above loaded value;
9578 // 2.3: the vector has only one value at index 0, others are all undef;
9579 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9580 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9581 Subtarget.isISA3_1() && ElementSize <= 16)
9582 return SDValue();
9583
9584 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9585 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9586 Subtarget.hasVSX()) {
9587 SDValue Ops[] = {
9588 LD->getChain(), // Chain
9589 LD->getBasePtr(), // Ptr
9590 DAG.getValueType(Op.getValueType()) // VT
9591 };
9592 SDValue LdSplt = DAG.getMemIntrinsicNode(
9593 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9594 LD->getMemoryVT(), LD->getMemOperand());
9595 // Replace all uses of the output chain of the original load with the
9596 // output chain of the new load.
9597 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9598 LdSplt.getValue(1));
9599 return LdSplt;
9600 }
9601 }
9602
9603 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9604 // 32-bits can be lowered to VSX instructions under certain conditions.
9605 // Without VSX, there is no pattern more efficient than expanding the node.
9606 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9607 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9608 Subtarget.hasP8Vector()))
9609 return Op;
9610 return SDValue();
9611 }
9612
9613 uint64_t SplatBits = APSplatBits.getZExtValue();
9614 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9615 unsigned SplatSize = SplatBitSize / 8;
9616
9617 // First, handle single instruction cases.
9618
9619 // All zeros?
9620 if (SplatBits == 0) {
9621 // Canonicalize all zero vectors to be v4i32.
9622 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9623 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9624 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9625 }
9626 return Op;
9627 }
9628
9629 // We have XXSPLTIW for constant splats four bytes wide.
9630 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9631 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9632 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9633 // turned into a 4-byte splat of 0xABABABAB.
9634 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9635 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9636 Op.getValueType(), DAG, dl);
9637
9638 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9639 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9640 dl);
9641
9642 // We have XXSPLTIB for constant splats one byte wide.
9643 if (Subtarget.hasP9Vector() && SplatSize == 1)
9644 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9645 dl);
9646
9647 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9648 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9649 (32-SplatBitSize));
9650 if (SextVal >= -16 && SextVal <= 15)
9651 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9652 dl);
9653
9654 // Two instruction sequences.
9655
9656 // If this value is in the range [-32,30] and is even, use:
9657 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9658 // If this value is in the range [17,31] and is odd, use:
9659 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9660 // If this value is in the range [-31,-17] and is odd, use:
9661 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9662 // Note the last two are three-instruction sequences.
9663 if (SextVal >= -32 && SextVal <= 31) {
9664 // To avoid having these optimizations undone by constant folding,
9665 // we convert to a pseudo that will be expanded later into one of
9666 // the above forms.
9667 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9668 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9669 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9670 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9671 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9672 if (VT == Op.getValueType())
9673 return RetVal;
9674 else
9675 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9676 }
9677
9678 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9679 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9680 // for fneg/fabs.
9681 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9682 // Make -1 and vspltisw -1:
9683 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9684
9685 // Make the VSLW intrinsic, computing 0x8000_0000.
9686 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9687 OnesV, DAG, dl);
9688
9689 // xor by OnesV to invert it.
9690 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9691 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9692 }
9693
9694 // Check to see if this is a wide variety of vsplti*, binop self cases.
9695 static const signed char SplatCsts[] = {
9696 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9697 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9698 };
9699
9700 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9701 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9702 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9703 int i = SplatCsts[idx];
9704
9705 // Figure out what shift amount will be used by altivec if shifted by i in
9706 // this splat size.
9707 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9708
9709 // vsplti + shl self.
9710 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9711 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9712 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9713 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9714 Intrinsic::ppc_altivec_vslw
9715 };
9716 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9717 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9718 }
9719
9720 // vsplti + srl self.
9721 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9722 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9723 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9724 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9725 Intrinsic::ppc_altivec_vsrw
9726 };
9727 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9728 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9729 }
9730
9731 // vsplti + rol self.
9732 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9733 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9734 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9735 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9736 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9737 Intrinsic::ppc_altivec_vrlw
9738 };
9739 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9740 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9741 }
9742
9743 // t = vsplti c, result = vsldoi t, t, 1
9744 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9745 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9746 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9747 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9748 }
9749 // t = vsplti c, result = vsldoi t, t, 2
9750 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9751 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9752 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9753 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9754 }
9755 // t = vsplti c, result = vsldoi t, t, 3
9756 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9757 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9758 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9759 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9760 }
9761 }
9762
9763 return SDValue();
9764}
9765
9766/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9767/// the specified operations to build the shuffle.
9768static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9769 SDValue RHS, SelectionDAG &DAG,
9770 const SDLoc &dl) {
9771 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9772 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9773 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9774
9775 enum {
9776 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9777 OP_VMRGHW,
9778 OP_VMRGLW,
9779 OP_VSPLTISW0,
9780 OP_VSPLTISW1,
9781 OP_VSPLTISW2,
9782 OP_VSPLTISW3,
9783 OP_VSLDOI4,
9784 OP_VSLDOI8,
9785 OP_VSLDOI12
9786 };
9787
9788 if (OpNum == OP_COPY) {
9789 if (LHSID == (1*9+2)*9+3) return LHS;
9790 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9791 return RHS;
9792 }
9793
9794 SDValue OpLHS, OpRHS;
9795 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9796 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9797
9798 int ShufIdxs[16];
9799 switch (OpNum) {
9800 default: llvm_unreachable("Unknown i32 permute!");
9801 case OP_VMRGHW:
9802 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9803 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9804 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9805 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9806 break;
9807 case OP_VMRGLW:
9808 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9809 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9810 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9811 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9812 break;
9813 case OP_VSPLTISW0:
9814 for (unsigned i = 0; i != 16; ++i)
9815 ShufIdxs[i] = (i&3)+0;
9816 break;
9817 case OP_VSPLTISW1:
9818 for (unsigned i = 0; i != 16; ++i)
9819 ShufIdxs[i] = (i&3)+4;
9820 break;
9821 case OP_VSPLTISW2:
9822 for (unsigned i = 0; i != 16; ++i)
9823 ShufIdxs[i] = (i&3)+8;
9824 break;
9825 case OP_VSPLTISW3:
9826 for (unsigned i = 0; i != 16; ++i)
9827 ShufIdxs[i] = (i&3)+12;
9828 break;
9829 case OP_VSLDOI4:
9830 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9831 case OP_VSLDOI8:
9832 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9833 case OP_VSLDOI12:
9834 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9835 }
9836 EVT VT = OpLHS.getValueType();
9837 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9838 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9839 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9840 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9841}
9842
9843/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9844/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9845/// SDValue.
9846SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9847 SelectionDAG &DAG) const {
9848 const unsigned BytesInVector = 16;
9849 bool IsLE = Subtarget.isLittleEndian();
9850 SDLoc dl(N);
9851 SDValue V1 = N->getOperand(0);
9852 SDValue V2 = N->getOperand(1);
9853 unsigned ShiftElts = 0, InsertAtByte = 0;
9854 bool Swap = false;
9855
9856 // Shifts required to get the byte we want at element 7.
9857 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9858 0, 15, 14, 13, 12, 11, 10, 9};
9859 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9860 1, 2, 3, 4, 5, 6, 7, 8};
9861
9862 ArrayRef<int> Mask = N->getMask();
9863 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9864
9865 // For each mask element, find out if we're just inserting something
9866 // from V2 into V1 or vice versa.
9867 // Possible permutations inserting an element from V2 into V1:
9868 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9869 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9870 // ...
9871 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9872 // Inserting from V1 into V2 will be similar, except mask range will be
9873 // [16,31].
9874
9875 bool FoundCandidate = false;
9876 // If both vector operands for the shuffle are the same vector, the mask
9877 // will contain only elements from the first one and the second one will be
9878 // undef.
9879 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9880 // Go through the mask of half-words to find an element that's being moved
9881 // from one vector to the other.
9882 for (unsigned i = 0; i < BytesInVector; ++i) {
9883 unsigned CurrentElement = Mask[i];
9884 // If 2nd operand is undefined, we should only look for element 7 in the
9885 // Mask.
9886 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9887 continue;
9888
9889 bool OtherElementsInOrder = true;
9890 // Examine the other elements in the Mask to see if they're in original
9891 // order.
9892 for (unsigned j = 0; j < BytesInVector; ++j) {
9893 if (j == i)
9894 continue;
9895 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9896 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9897 // in which we always assume we're always picking from the 1st operand.
9898 int MaskOffset =
9899 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9900 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9901 OtherElementsInOrder = false;
9902 break;
9903 }
9904 }
9905 // If other elements are in original order, we record the number of shifts
9906 // we need to get the element we want into element 7. Also record which byte
9907 // in the vector we should insert into.
9908 if (OtherElementsInOrder) {
9909 // If 2nd operand is undefined, we assume no shifts and no swapping.
9910 if (V2.isUndef()) {
9911 ShiftElts = 0;
9912 Swap = false;
9913 } else {
9914 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9915 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9916 : BigEndianShifts[CurrentElement & 0xF];
9917 Swap = CurrentElement < BytesInVector;
9918 }
9919 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9920 FoundCandidate = true;
9921 break;
9922 }
9923 }
9924
9925 if (!FoundCandidate)
9926 return SDValue();
9927
9928 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9929 // optionally with VECSHL if shift is required.
9930 if (Swap)
9931 std::swap(V1, V2);
9932 if (V2.isUndef())
9933 V2 = V1;
9934 if (ShiftElts) {
9935 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9936 DAG.getConstant(ShiftElts, dl, MVT::i32));
9937 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9938 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9939 }
9940 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9941 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9942}
9943
9944/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9945/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9946/// SDValue.
9947SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9948 SelectionDAG &DAG) const {
9949 const unsigned NumHalfWords = 8;
9950 const unsigned BytesInVector = NumHalfWords * 2;
9951 // Check that the shuffle is on half-words.
9952 if (!isNByteElemShuffleMask(N, 2, 1))
9953 return SDValue();
9954
9955 bool IsLE = Subtarget.isLittleEndian();
9956 SDLoc dl(N);
9957 SDValue V1 = N->getOperand(0);
9958 SDValue V2 = N->getOperand(1);
9959 unsigned ShiftElts = 0, InsertAtByte = 0;
9960 bool Swap = false;
9961
9962 // Shifts required to get the half-word we want at element 3.
9963 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9964 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9965
9966 uint32_t Mask = 0;
9967 uint32_t OriginalOrderLow = 0x1234567;
9968 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9969 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9970 // 32-bit space, only need 4-bit nibbles per element.
9971 for (unsigned i = 0; i < NumHalfWords; ++i) {
9972 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9973 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9974 }
9975
9976 // For each mask element, find out if we're just inserting something
9977 // from V2 into V1 or vice versa. Possible permutations inserting an element
9978 // from V2 into V1:
9979 // X, 1, 2, 3, 4, 5, 6, 7
9980 // 0, X, 2, 3, 4, 5, 6, 7
9981 // 0, 1, X, 3, 4, 5, 6, 7
9982 // 0, 1, 2, X, 4, 5, 6, 7
9983 // 0, 1, 2, 3, X, 5, 6, 7
9984 // 0, 1, 2, 3, 4, X, 6, 7
9985 // 0, 1, 2, 3, 4, 5, X, 7
9986 // 0, 1, 2, 3, 4, 5, 6, X
9987 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9988
9989 bool FoundCandidate = false;
9990 // Go through the mask of half-words to find an element that's being moved
9991 // from one vector to the other.
9992 for (unsigned i = 0; i < NumHalfWords; ++i) {
9993 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9994 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9995 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9996 uint32_t TargetOrder = 0x0;
9997
9998 // If both vector operands for the shuffle are the same vector, the mask
9999 // will contain only elements from the first one and the second one will be
10000 // undef.
10001 if (V2.isUndef()) {
10002 ShiftElts = 0;
10003 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10004 TargetOrder = OriginalOrderLow;
10005 Swap = false;
10006 // Skip if not the correct element or mask of other elements don't equal
10007 // to our expected order.
10008 if (MaskOneElt == VINSERTHSrcElem &&
10009 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10010 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10011 FoundCandidate = true;
10012 break;
10013 }
10014 } else { // If both operands are defined.
10015 // Target order is [8,15] if the current mask is between [0,7].
10016 TargetOrder =
10017 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10018 // Skip if mask of other elements don't equal our expected order.
10019 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10020 // We only need the last 3 bits for the number of shifts.
10021 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10022 : BigEndianShifts[MaskOneElt & 0x7];
10023 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10024 Swap = MaskOneElt < NumHalfWords;
10025 FoundCandidate = true;
10026 break;
10027 }
10028 }
10029 }
10030
10031 if (!FoundCandidate)
10032 return SDValue();
10033
10034 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10035 // optionally with VECSHL if shift is required.
10036 if (Swap)
10037 std::swap(V1, V2);
10038 if (V2.isUndef())
10039 V2 = V1;
10040 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10041 if (ShiftElts) {
10042 // Double ShiftElts because we're left shifting on v16i8 type.
10043 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10044 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10045 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10046 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10047 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10048 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10049 }
10050 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10051 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10052 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10053 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10054}
10055
10056/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10057/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10058/// return the default SDValue.
10059SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10060 SelectionDAG &DAG) const {
10061 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10062 // to v16i8. Peek through the bitcasts to get the actual operands.
10065
10066 auto ShuffleMask = SVN->getMask();
10067 SDValue VecShuffle(SVN, 0);
10068 SDLoc DL(SVN);
10069
10070 // Check that we have a four byte shuffle.
10071 if (!isNByteElemShuffleMask(SVN, 4, 1))
10072 return SDValue();
10073
10074 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10075 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10076 std::swap(LHS, RHS);
10078 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10079 if (!CommutedSV)
10080 return SDValue();
10081 ShuffleMask = CommutedSV->getMask();
10082 }
10083
10084 // Ensure that the RHS is a vector of constants.
10085 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10086 if (!BVN)
10087 return SDValue();
10088
10089 // Check if RHS is a splat of 4-bytes (or smaller).
10090 APInt APSplatValue, APSplatUndef;
10091 unsigned SplatBitSize;
10092 bool HasAnyUndefs;
10093 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10094 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10095 SplatBitSize > 32)
10096 return SDValue();
10097
10098 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10099 // The instruction splats a constant C into two words of the source vector
10100 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10101 // Thus we check that the shuffle mask is the equivalent of
10102 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10103 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10104 // within each word are consecutive, so we only need to check the first byte.
10105 SDValue Index;
10106 bool IsLE = Subtarget.isLittleEndian();
10107 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10108 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10109 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10110 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10111 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10112 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10113 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10114 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10115 else
10116 return SDValue();
10117
10118 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10119 // for XXSPLTI32DX.
10120 unsigned SplatVal = APSplatValue.getZExtValue();
10121 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10122 SplatVal |= (SplatVal << SplatBitSize);
10123
10124 SDValue SplatNode = DAG.getNode(
10125 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10126 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10127 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10128}
10129
10130/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10131/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10132/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10133/// i.e (or (shl x, C1), (srl x, 128-C1)).
10134SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10135 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10136 assert(Op.getValueType() == MVT::v1i128 &&
10137 "Only set v1i128 as custom, other type shouldn't reach here!");
10138 SDLoc dl(Op);
10139 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10140 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10141 unsigned SHLAmt = N1.getConstantOperandVal(0);
10142 if (SHLAmt % 8 == 0) {
10143 std::array<int, 16> Mask;
10144 std::iota(Mask.begin(), Mask.end(), 0);
10145 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10146 if (SDValue Shuffle =
10147 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10148 DAG.getUNDEF(MVT::v16i8), Mask))
10149 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10150 }
10151 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10152 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10153 DAG.getConstant(SHLAmt, dl, MVT::i32));
10154 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10155 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10156 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10157 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10158}
10159
10160/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10161/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10162/// return the code it can be lowered into. Worst case, it can always be
10163/// lowered into a vperm.
10164SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10165 SelectionDAG &DAG) const {
10166 SDLoc dl(Op);
10167 SDValue V1 = Op.getOperand(0);
10168 SDValue V2 = Op.getOperand(1);
10169 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10170
10171 // Any nodes that were combined in the target-independent combiner prior
10172 // to vector legalization will not be sent to the target combine. Try to
10173 // combine it here.
10174 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10175 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10176 return NewShuffle;
10177 Op = NewShuffle;
10178 SVOp = cast<ShuffleVectorSDNode>(Op);
10179 V1 = Op.getOperand(0);
10180 V2 = Op.getOperand(1);
10181 }
10182 EVT VT = Op.getValueType();
10183 bool isLittleEndian = Subtarget.isLittleEndian();
10184
10185 unsigned ShiftElts, InsertAtByte;
10186 bool Swap = false;
10187
10188 // If this is a load-and-splat, we can do that with a single instruction
10189 // in some cases. However if the load has multiple uses, we don't want to
10190 // combine it because that will just produce multiple loads.
10191 bool IsPermutedLoad = false;
10192 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10193 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10194 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10195 InputLoad->hasOneUse()) {
10196 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10197 int SplatIdx =
10198 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10199
10200 // The splat index for permuted loads will be in the left half of the vector
10201 // which is strictly wider than the loaded value by 8 bytes. So we need to
10202 // adjust the splat index to point to the correct address in memory.
10203 if (IsPermutedLoad) {
10204 assert((isLittleEndian || IsFourByte) &&
10205 "Unexpected size for permuted load on big endian target");
10206 SplatIdx += IsFourByte ? 2 : 1;
10207 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10208 "Splat of a value outside of the loaded memory");
10209 }
10210
10211 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10212 // For 4-byte load-and-splat, we need Power9.
10213 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10214 uint64_t Offset = 0;
10215 if (IsFourByte)
10216 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10217 else
10218 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10219
10220 // If the width of the load is the same as the width of the splat,
10221 // loading with an offset would load the wrong memory.
10222 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10223 Offset = 0;
10224
10225 SDValue BasePtr = LD->getBasePtr();
10226 if (Offset != 0)
10228 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10229 SDValue Ops[] = {
10230 LD->getChain(), // Chain
10231 BasePtr, // BasePtr
10232 DAG.getValueType(Op.getValueType()) // VT
10233 };
10234 SDVTList VTL =
10235 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10236 SDValue LdSplt =
10238 Ops, LD->getMemoryVT(), LD->getMemOperand());
10239 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10240 if (LdSplt.getValueType() != SVOp->getValueType(0))
10241 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10242 return LdSplt;
10243 }
10244 }
10245
10246 // All v2i64 and v2f64 shuffles are legal
10247 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10248 return Op;
10249
10250 if (Subtarget.hasP9Vector() &&
10251 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10252 isLittleEndian)) {
10253 if (V2.isUndef())
10254 V2 = V1;
10255 else if (Swap)
10256 std::swap(V1, V2);
10257 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10258 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10259 if (ShiftElts) {
10260 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10261 DAG.getConstant(ShiftElts, dl, MVT::i32));
10262 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10263 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10264 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10265 }
10266 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10267 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10268 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10269 }
10270
10271 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10272 SDValue SplatInsertNode;
10273 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10274 return SplatInsertNode;
10275 }
10276
10277 if (Subtarget.hasP9Altivec()) {
10278 SDValue NewISDNode;
10279 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10280 return NewISDNode;
10281
10282 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10283 return NewISDNode;
10284 }
10285
10286 if (Subtarget.hasVSX() &&
10287 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10288 if (Swap)
10289 std::swap(V1, V2);
10290 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10291 SDValue Conv2 =
10292 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10293
10294 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10295 DAG.getConstant(ShiftElts, dl, MVT::i32));
10296 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10297 }
10298
10299 if (Subtarget.hasVSX() &&
10300 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10301 if (Swap)
10302 std::swap(V1, V2);
10303 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10304 SDValue Conv2 =
10305 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10306
10307 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10308 DAG.getConstant(ShiftElts, dl, MVT::i32));
10309 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10310 }
10311
10312 if (Subtarget.hasP9Vector()) {
10313 if (PPC::isXXBRHShuffleMask(SVOp)) {
10314 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10315 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10316 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10317 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10318 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10319 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10320 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10321 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10322 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10323 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10324 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10325 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10326 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10327 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10328 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10329 }
10330 }
10331
10332 if (Subtarget.hasVSX()) {
10333 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10334 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10335
10336 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10337 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10338 DAG.getConstant(SplatIdx, dl, MVT::i32));
10339 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10340 }
10341
10342 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10343 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10344 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10345 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10346 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10347 }
10348 }
10349
10350 // Cases that are handled by instructions that take permute immediates
10351 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10352 // selected by the instruction selector.
10353 if (V2.isUndef()) {
10354 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10355 PPC::isSplatShuffleMask(SVOp, 2) ||
10356 PPC::isSplatShuffleMask(SVOp, 4) ||
10357 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10358 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10359 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10360 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10361 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10362 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10363 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10364 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10365 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10366 (Subtarget.hasP8Altivec() && (
10367 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10368 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10369 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10370 return Op;
10371 }
10372 }
10373
10374 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10375 // and produce a fixed permutation. If any of these match, do not lower to
10376 // VPERM.
10377 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10378 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10379 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10380 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10381 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10382 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10383 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10384 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10385 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10386 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10387 (Subtarget.hasP8Altivec() && (
10388 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10389 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10390 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10391 return Op;
10392
10393 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10394 // perfect shuffle table to emit an optimal matching sequence.
10395 ArrayRef<int> PermMask = SVOp->getMask();
10396
10397 if (!DisablePerfectShuffle && !isLittleEndian) {
10398 unsigned PFIndexes[4];
10399 bool isFourElementShuffle = true;
10400 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10401 ++i) { // Element number
10402 unsigned EltNo = 8; // Start out undef.
10403 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10404 if (PermMask[i * 4 + j] < 0)
10405 continue; // Undef, ignore it.
10406
10407 unsigned ByteSource = PermMask[i * 4 + j];
10408 if ((ByteSource & 3) != j) {
10409 isFourElementShuffle = false;
10410 break;
10411 }
10412
10413 if (EltNo == 8) {
10414 EltNo = ByteSource / 4;
10415 } else if (EltNo != ByteSource / 4) {
10416 isFourElementShuffle = false;
10417 break;
10418 }
10419 }
10420 PFIndexes[i] = EltNo;
10421 }
10422
10423 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10424 // perfect shuffle vector to determine if it is cost effective to do this as
10425 // discrete instructions, or whether we should use a vperm.
10426 // For now, we skip this for little endian until such time as we have a
10427 // little-endian perfect shuffle table.
10428 if (isFourElementShuffle) {
10429 // Compute the index in the perfect shuffle table.
10430 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10431 PFIndexes[2] * 9 + PFIndexes[3];
10432
10433 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10434 unsigned Cost = (PFEntry >> 30);
10435
10436 // Determining when to avoid vperm is tricky. Many things affect the cost
10437 // of vperm, particularly how many times the perm mask needs to be
10438 // computed. For example, if the perm mask can be hoisted out of a loop or
10439 // is already used (perhaps because there are multiple permutes with the
10440 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10441 // permute mask out of the loop requires an extra register.
10442 //
10443 // As a compromise, we only emit discrete instructions if the shuffle can
10444 // be generated in 3 or fewer operations. When we have loop information
10445 // available, if this block is within a loop, we should avoid using vperm
10446 // for 3-operation perms and use a constant pool load instead.
10447 if (Cost < 3)
10448 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10449 }
10450 }
10451
10452 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10453 // vector that will get spilled to the constant pool.
10454 if (V2.isUndef()) V2 = V1;
10455
10456 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10457}
10458
10459SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10460 ArrayRef<int> PermMask, EVT VT,
10461 SDValue V1, SDValue V2) const {
10462 unsigned Opcode = PPCISD::VPERM;
10463 EVT ValType = V1.getValueType();
10464 SDLoc dl(Op);
10465 bool NeedSwap = false;
10466 bool isLittleEndian = Subtarget.isLittleEndian();
10467 bool isPPC64 = Subtarget.isPPC64();
10468
10469 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10470 (V1->hasOneUse() || V2->hasOneUse())) {
10471 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10472 "XXPERM instead\n");
10473 Opcode = PPCISD::XXPERM;
10474
10475 // The second input to XXPERM is also an output so if the second input has
10476 // multiple uses then copying is necessary, as a result we want the
10477 // single-use operand to be used as the second input to prevent copying.
10478 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10479 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10480 std::swap(V1, V2);
10481 NeedSwap = !NeedSwap;
10482 }
10483 }
10484
10485 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10486 // that it is in input element units, not in bytes. Convert now.
10487
10488 // For little endian, the order of the input vectors is reversed, and
10489 // the permutation mask is complemented with respect to 31. This is
10490 // necessary to produce proper semantics with the big-endian-based vperm
10491 // instruction.
10492 EVT EltVT = V1.getValueType().getVectorElementType();
10493 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10494
10495 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10496 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10497
10498 /*
10499 Vectors will be appended like so: [ V1 | v2 ]
10500 XXSWAPD on V1:
10501 [ A | B | C | D ] -> [ C | D | A | B ]
10502 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10503 i.e. index of A, B += 8, and index of C, D -= 8.
10504 XXSWAPD on V2:
10505 [ E | F | G | H ] -> [ G | H | E | F ]
10506 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10507 i.e. index of E, F += 8, index of G, H -= 8
10508 Swap V1 and V2:
10509 [ V1 | V2 ] -> [ V2 | V1 ]
10510 0-15 16-31 0-15 16-31
10511 i.e. index of V1 += 16, index of V2 -= 16
10512 */
10513
10514 SmallVector<SDValue, 16> ResultMask;
10515 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10516 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10517
10518 if (V1HasXXSWAPD) {
10519 if (SrcElt < 8)
10520 SrcElt += 8;
10521 else if (SrcElt < 16)
10522 SrcElt -= 8;
10523 }
10524 if (V2HasXXSWAPD) {
10525 if (SrcElt > 23)
10526 SrcElt -= 8;
10527 else if (SrcElt > 15)
10528 SrcElt += 8;
10529 }
10530 if (NeedSwap) {
10531 if (SrcElt < 16)
10532 SrcElt += 16;
10533 else
10534 SrcElt -= 16;
10535 }
10536 for (unsigned j = 0; j != BytesPerElement; ++j)
10537 if (isLittleEndian)
10538 ResultMask.push_back(
10539 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10540 else
10541 ResultMask.push_back(
10542 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10543 }
10544
10545 if (V1HasXXSWAPD) {
10546 dl = SDLoc(V1->getOperand(0));
10547 V1 = V1->getOperand(0)->getOperand(1);
10548 }
10549 if (V2HasXXSWAPD) {
10550 dl = SDLoc(V2->getOperand(0));
10551 V2 = V2->getOperand(0)->getOperand(1);
10552 }
10553
10554 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10555 if (ValType != MVT::v2f64)
10556 V1 = DAG.getBitcast(MVT::v2f64, V1);
10557 if (V2.getValueType() != MVT::v2f64)
10558 V2 = DAG.getBitcast(MVT::v2f64, V2);
10559 }
10560
10561 ShufflesHandledWithVPERM++;
10562 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10563 LLVM_DEBUG({
10564 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10565 if (Opcode == PPCISD::XXPERM) {
10566 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10567 } else {
10568 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10569 }
10570 SVOp->dump();
10571 dbgs() << "With the following permute control vector:\n";
10572 VPermMask.dump();
10573 });
10574
10575 if (Opcode == PPCISD::XXPERM)
10576 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10577
10578 // Only need to place items backwards in LE,
10579 // the mask was properly calculated.
10580 if (isLittleEndian)
10581 std::swap(V1, V2);
10582
10583 SDValue VPERMNode =
10584 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10585
10586 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10587 return VPERMNode;
10588}
10589
10590/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10591/// vector comparison. If it is, return true and fill in Opc/isDot with
10592/// information about the intrinsic.
10593static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10594 bool &isDot, const PPCSubtarget &Subtarget) {
10595 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10596 CompareOpc = -1;
10597 isDot = false;
10598 switch (IntrinsicID) {
10599 default:
10600 return false;
10601 // Comparison predicates.
10602 case Intrinsic::ppc_altivec_vcmpbfp_p:
10603 CompareOpc = 966;
10604 isDot = true;
10605 break;
10606 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10607 CompareOpc = 198;
10608 isDot = true;
10609 break;
10610 case Intrinsic::ppc_altivec_vcmpequb_p:
10611 CompareOpc = 6;
10612 isDot = true;
10613 break;
10614 case Intrinsic::ppc_altivec_vcmpequh_p:
10615 CompareOpc = 70;
10616 isDot = true;
10617 break;
10618 case Intrinsic::ppc_altivec_vcmpequw_p:
10619 CompareOpc = 134;
10620 isDot = true;
10621 break;
10622 case Intrinsic::ppc_altivec_vcmpequd_p:
10623 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10624 CompareOpc = 199;
10625 isDot = true;
10626 } else
10627 return false;
10628 break;
10629 case Intrinsic::ppc_altivec_vcmpneb_p:
10630 case Intrinsic::ppc_altivec_vcmpneh_p:
10631 case Intrinsic::ppc_altivec_vcmpnew_p:
10632 case Intrinsic::ppc_altivec_vcmpnezb_p:
10633 case Intrinsic::ppc_altivec_vcmpnezh_p:
10634 case Intrinsic::ppc_altivec_vcmpnezw_p:
10635 if (Subtarget.hasP9Altivec()) {
10636 switch (IntrinsicID) {
10637 default:
10638 llvm_unreachable("Unknown comparison intrinsic.");
10639 case Intrinsic::ppc_altivec_vcmpneb_p:
10640 CompareOpc = 7;
10641 break;
10642 case Intrinsic::ppc_altivec_vcmpneh_p:
10643 CompareOpc = 71;
10644 break;
10645 case Intrinsic::ppc_altivec_vcmpnew_p:
10646 CompareOpc = 135;
10647 break;
10648 case Intrinsic::ppc_altivec_vcmpnezb_p:
10649 CompareOpc = 263;
10650 break;
10651 case Intrinsic::ppc_altivec_vcmpnezh_p:
10652 CompareOpc = 327;
10653 break;
10654 case Intrinsic::ppc_altivec_vcmpnezw_p:
10655 CompareOpc = 391;
10656 break;
10657 }
10658 isDot = true;
10659 } else
10660 return false;
10661 break;
10662 case Intrinsic::ppc_altivec_vcmpgefp_p:
10663 CompareOpc = 454;
10664 isDot = true;
10665 break;
10666 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10667 CompareOpc = 710;
10668 isDot = true;
10669 break;
10670 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10671 CompareOpc = 774;
10672 isDot = true;
10673 break;
10674 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10675 CompareOpc = 838;
10676 isDot = true;
10677 break;
10678 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10679 CompareOpc = 902;
10680 isDot = true;
10681 break;
10682 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10683 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10684 CompareOpc = 967;
10685 isDot = true;
10686 } else
10687 return false;
10688 break;
10689 case Intrinsic::ppc_altivec_vcmpgtub_p:
10690 CompareOpc = 518;
10691 isDot = true;
10692 break;
10693 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10694 CompareOpc = 582;
10695 isDot = true;
10696 break;
10697 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10698 CompareOpc = 646;
10699 isDot = true;
10700 break;
10701 case Intrinsic::ppc_altivec_vcmpgtud_p:
10702 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10703 CompareOpc = 711;
10704 isDot = true;
10705 } else
10706 return false;
10707 break;
10708
10709 case Intrinsic::ppc_altivec_vcmpequq:
10710 case Intrinsic::ppc_altivec_vcmpgtsq:
10711 case Intrinsic::ppc_altivec_vcmpgtuq:
10712 if (!Subtarget.isISA3_1())
10713 return false;
10714 switch (IntrinsicID) {
10715 default:
10716 llvm_unreachable("Unknown comparison intrinsic.");
10717 case Intrinsic::ppc_altivec_vcmpequq:
10718 CompareOpc = 455;
10719 break;
10720 case Intrinsic::ppc_altivec_vcmpgtsq:
10721 CompareOpc = 903;
10722 break;
10723 case Intrinsic::ppc_altivec_vcmpgtuq:
10724 CompareOpc = 647;
10725 break;
10726 }
10727 break;
10728
10729 // VSX predicate comparisons use the same infrastructure
10730 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10731 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10732 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10733 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10734 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10735 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10736 if (Subtarget.hasVSX()) {
10737 switch (IntrinsicID) {
10738 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10739 CompareOpc = 99;
10740 break;
10741 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10742 CompareOpc = 115;
10743 break;
10744 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10745 CompareOpc = 107;
10746 break;
10747 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10748 CompareOpc = 67;
10749 break;
10750 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10751 CompareOpc = 83;
10752 break;
10753 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10754 CompareOpc = 75;
10755 break;
10756 }
10757 isDot = true;
10758 } else
10759 return false;
10760 break;
10761
10762 // Normal Comparisons.
10763 case Intrinsic::ppc_altivec_vcmpbfp:
10764 CompareOpc = 966;
10765 break;
10766 case Intrinsic::ppc_altivec_vcmpeqfp:
10767 CompareOpc = 198;
10768 break;
10769 case Intrinsic::ppc_altivec_vcmpequb:
10770 CompareOpc = 6;
10771 break;
10772 case Intrinsic::ppc_altivec_vcmpequh:
10773 CompareOpc = 70;
10774 break;
10775 case Intrinsic::ppc_altivec_vcmpequw:
10776 CompareOpc = 134;
10777 break;
10778 case Intrinsic::ppc_altivec_vcmpequd:
10779 if (Subtarget.hasP8Altivec())
10780 CompareOpc = 199;
10781 else
10782 return false;
10783 break;
10784 case Intrinsic::ppc_altivec_vcmpneb:
10785 case Intrinsic::ppc_altivec_vcmpneh:
10786 case Intrinsic::ppc_altivec_vcmpnew:
10787 case Intrinsic::ppc_altivec_vcmpnezb:
10788 case Intrinsic::ppc_altivec_vcmpnezh:
10789 case Intrinsic::ppc_altivec_vcmpnezw:
10790 if (Subtarget.hasP9Altivec())
10791 switch (IntrinsicID) {
10792 default:
10793 llvm_unreachable("Unknown comparison intrinsic.");
10794 case Intrinsic::ppc_altivec_vcmpneb:
10795 CompareOpc = 7;
10796 break;
10797 case Intrinsic::ppc_altivec_vcmpneh:
10798 CompareOpc = 71;
10799 break;
10800 case Intrinsic::ppc_altivec_vcmpnew:
10801 CompareOpc = 135;
10802 break;
10803 case Intrinsic::ppc_altivec_vcmpnezb:
10804 CompareOpc = 263;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpnezh:
10807 CompareOpc = 327;
10808 break;
10809 case Intrinsic::ppc_altivec_vcmpnezw:
10810 CompareOpc = 391;
10811 break;
10812 }
10813 else
10814 return false;
10815 break;
10816 case Intrinsic::ppc_altivec_vcmpgefp:
10817 CompareOpc = 454;
10818 break;
10819 case Intrinsic::ppc_altivec_vcmpgtfp:
10820 CompareOpc = 710;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtsb:
10823 CompareOpc = 774;
10824 break;
10825 case Intrinsic::ppc_altivec_vcmpgtsh:
10826 CompareOpc = 838;
10827 break;
10828 case Intrinsic::ppc_altivec_vcmpgtsw:
10829 CompareOpc = 902;
10830 break;
10831 case Intrinsic::ppc_altivec_vcmpgtsd:
10832 if (Subtarget.hasP8Altivec())
10833 CompareOpc = 967;
10834 else
10835 return false;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtub:
10838 CompareOpc = 518;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpgtuh:
10841 CompareOpc = 582;
10842 break;
10843 case Intrinsic::ppc_altivec_vcmpgtuw:
10844 CompareOpc = 646;
10845 break;
10846 case Intrinsic::ppc_altivec_vcmpgtud:
10847 if (Subtarget.hasP8Altivec())
10848 CompareOpc = 711;
10849 else
10850 return false;
10851 break;
10852 case Intrinsic::ppc_altivec_vcmpequq_p:
10853 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10854 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10855 if (!Subtarget.isISA3_1())
10856 return false;
10857 switch (IntrinsicID) {
10858 default:
10859 llvm_unreachable("Unknown comparison intrinsic.");
10860 case Intrinsic::ppc_altivec_vcmpequq_p:
10861 CompareOpc = 455;
10862 break;
10863 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10864 CompareOpc = 903;
10865 break;
10866 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10867 CompareOpc = 647;
10868 break;
10869 }
10870 isDot = true;
10871 break;
10872 }
10873 return true;
10874}
10875
10876/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10877/// lower, do it, otherwise return null.
10878SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10879 SelectionDAG &DAG) const {
10880 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10881
10882 SDLoc dl(Op);
10883
10884 switch (IntrinsicID) {
10885 case Intrinsic::thread_pointer:
10886 // Reads the thread pointer register, used for __builtin_thread_pointer.
10887 if (Subtarget.isPPC64())
10888 return DAG.getRegister(PPC::X13, MVT::i64);
10889 return DAG.getRegister(PPC::R2, MVT::i32);
10890
10891 case Intrinsic::ppc_rldimi: {
10892 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10893 SDValue Src = Op.getOperand(1);
10894 APInt Mask = Op.getConstantOperandAPInt(4);
10895 if (Mask.isZero())
10896 return Op.getOperand(2);
10897 if (Mask.isAllOnes())
10898 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10899 uint64_t SH = Op.getConstantOperandVal(3);
10900 unsigned MB = 0, ME = 0;
10901 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10902 report_fatal_error("invalid rldimi mask!");
10903 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10904 if (ME < 63 - SH) {
10905 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10906 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10907 } else if (ME > 63 - SH) {
10908 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10909 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10910 }
10911 return SDValue(
10912 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10913 {Op.getOperand(2), Src,
10914 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10915 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10916 0);
10917 }
10918
10919 case Intrinsic::ppc_rlwimi: {
10920 APInt Mask = Op.getConstantOperandAPInt(4);
10921 if (Mask.isZero())
10922 return Op.getOperand(2);
10923 if (Mask.isAllOnes())
10924 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10925 Op.getOperand(3));
10926 unsigned MB = 0, ME = 0;
10927 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10928 report_fatal_error("invalid rlwimi mask!");
10929 return SDValue(DAG.getMachineNode(
10930 PPC::RLWIMI, dl, MVT::i32,
10931 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10932 DAG.getTargetConstant(MB, dl, MVT::i32),
10933 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10934 0);
10935 }
10936
10937 case Intrinsic::ppc_rlwnm: {
10938 if (Op.getConstantOperandVal(3) == 0)
10939 return DAG.getConstant(0, dl, MVT::i32);
10940 unsigned MB = 0, ME = 0;
10941 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10942 report_fatal_error("invalid rlwnm mask!");
10943 return SDValue(
10944 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10945 {Op.getOperand(1), Op.getOperand(2),
10946 DAG.getTargetConstant(MB, dl, MVT::i32),
10947 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10948 0);
10949 }
10950
10951 case Intrinsic::ppc_mma_disassemble_acc: {
10952 if (Subtarget.isISAFuture()) {
10953 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10954 SDValue WideVec =
10955 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10956 Op.getOperand(1)),
10957 0);
10959 SDValue Value = SDValue(WideVec.getNode(), 0);
10960 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10961
10962 SDValue Extract;
10963 Extract = DAG.getNode(
10964 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10965 Subtarget.isLittleEndian() ? Value2 : Value,
10966 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10967 dl, getPointerTy(DAG.getDataLayout())));
10968 RetOps.push_back(Extract);
10969 Extract = DAG.getNode(
10970 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10971 Subtarget.isLittleEndian() ? Value2 : Value,
10972 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10973 dl, getPointerTy(DAG.getDataLayout())));
10974 RetOps.push_back(Extract);
10975 Extract = DAG.getNode(
10976 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10977 Subtarget.isLittleEndian() ? Value : Value2,
10978 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10979 dl, getPointerTy(DAG.getDataLayout())));
10980 RetOps.push_back(Extract);
10981 Extract = DAG.getNode(
10982 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10983 Subtarget.isLittleEndian() ? Value : Value2,
10984 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10985 dl, getPointerTy(DAG.getDataLayout())));
10986 RetOps.push_back(Extract);
10987 return DAG.getMergeValues(RetOps, dl);
10988 }
10989 [[fallthrough]];
10990 }
10991 case Intrinsic::ppc_vsx_disassemble_pair: {
10992 int NumVecs = 2;
10993 SDValue WideVec = Op.getOperand(1);
10994 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10995 NumVecs = 4;
10996 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10997 }
10999 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11000 SDValue Extract = DAG.getNode(
11001 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11002 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11003 : VecNo,
11004 dl, getPointerTy(DAG.getDataLayout())));
11005 RetOps.push_back(Extract);
11006 }
11007 return DAG.getMergeValues(RetOps, dl);
11008 }
11009
11010 case Intrinsic::ppc_mma_xxmfacc:
11011 case Intrinsic::ppc_mma_xxmtacc: {
11012 // Allow pre-isa-future subtargets to lower as normal.
11013 if (!Subtarget.isISAFuture())
11014 return SDValue();
11015 // The intrinsics for xxmtacc and xxmfacc take one argument of
11016 // type v512i1, for future cpu the corresponding wacc instruction
11017 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11018 // the need to produce the xxm[t|f]acc.
11019 SDValue WideVec = Op.getOperand(1);
11020 DAG.ReplaceAllUsesWith(Op, WideVec);
11021 return SDValue();
11022 }
11023
11024 case Intrinsic::ppc_unpack_longdouble: {
11025 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11026 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11027 "Argument of long double unpack must be 0 or 1!");
11028 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11029 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11030 Idx->getValueType(0)));
11031 }
11032
11033 case Intrinsic::ppc_compare_exp_lt:
11034 case Intrinsic::ppc_compare_exp_gt:
11035 case Intrinsic::ppc_compare_exp_eq:
11036 case Intrinsic::ppc_compare_exp_uo: {
11037 unsigned Pred;
11038 switch (IntrinsicID) {
11039 case Intrinsic::ppc_compare_exp_lt:
11040 Pred = PPC::PRED_LT;
11041 break;
11042 case Intrinsic::ppc_compare_exp_gt:
11043 Pred = PPC::PRED_GT;
11044 break;
11045 case Intrinsic::ppc_compare_exp_eq:
11046 Pred = PPC::PRED_EQ;
11047 break;
11048 case Intrinsic::ppc_compare_exp_uo:
11049 Pred = PPC::PRED_UN;
11050 break;
11051 }
11052 return SDValue(
11053 DAG.getMachineNode(
11054 PPC::SELECT_CC_I4, dl, MVT::i32,
11055 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11056 Op.getOperand(1), Op.getOperand(2)),
11057 0),
11058 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11059 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11060 0);
11061 }
11062 case Intrinsic::ppc_test_data_class: {
11063 EVT OpVT = Op.getOperand(1).getValueType();
11064 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11065 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11066 : PPC::XSTSTDCSP);
11067 return SDValue(
11068 DAG.getMachineNode(
11069 PPC::SELECT_CC_I4, dl, MVT::i32,
11070 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11071 Op.getOperand(1)),
11072 0),
11073 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11074 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11075 0);
11076 }
11077 case Intrinsic::ppc_fnmsub: {
11078 EVT VT = Op.getOperand(1).getValueType();
11079 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11080 return DAG.getNode(
11081 ISD::FNEG, dl, VT,
11082 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11083 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11084 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11085 Op.getOperand(2), Op.getOperand(3));
11086 }
11087 case Intrinsic::ppc_convert_f128_to_ppcf128:
11088 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11089 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11090 ? RTLIB::CONVERT_PPCF128_F128
11091 : RTLIB::CONVERT_F128_PPCF128;
11092 MakeLibCallOptions CallOptions;
11093 std::pair<SDValue, SDValue> Result =
11094 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11095 dl, SDValue());
11096 return Result.first;
11097 }
11098 case Intrinsic::ppc_maxfe:
11099 case Intrinsic::ppc_maxfl:
11100 case Intrinsic::ppc_maxfs:
11101 case Intrinsic::ppc_minfe:
11102 case Intrinsic::ppc_minfl:
11103 case Intrinsic::ppc_minfs: {
11104 EVT VT = Op.getValueType();
11105 assert(
11106 all_of(Op->ops().drop_front(4),
11107 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11108 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11109 (void)VT;
11111 if (IntrinsicID == Intrinsic::ppc_minfe ||
11112 IntrinsicID == Intrinsic::ppc_minfl ||
11113 IntrinsicID == Intrinsic::ppc_minfs)
11114 CC = ISD::SETLT;
11115 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11116 SDValue Res = Op.getOperand(I);
11117 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11118 Res =
11119 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11120 }
11121 return Res;
11122 }
11123 }
11124
11125 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11126 // opcode number of the comparison.
11127 int CompareOpc;
11128 bool isDot;
11129 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11130 return SDValue(); // Don't custom lower most intrinsics.
11131
11132 // If this is a non-dot comparison, make the VCMP node and we are done.
11133 if (!isDot) {
11134 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11135 Op.getOperand(1), Op.getOperand(2),
11136 DAG.getConstant(CompareOpc, dl, MVT::i32));
11137 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11138 }
11139
11140 // Create the PPCISD altivec 'dot' comparison node.
11141 SDValue Ops[] = {
11142 Op.getOperand(2), // LHS
11143 Op.getOperand(3), // RHS
11144 DAG.getConstant(CompareOpc, dl, MVT::i32)
11145 };
11146 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11147 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11148
11149 // Now that we have the comparison, emit a copy from the CR to a GPR.
11150 // This is flagged to the above dot comparison.
11151 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11152 DAG.getRegister(PPC::CR6, MVT::i32),
11153 CompNode.getValue(1));
11154
11155 // Unpack the result based on how the target uses it.
11156 unsigned BitNo; // Bit # of CR6.
11157 bool InvertBit; // Invert result?
11158 switch (Op.getConstantOperandVal(1)) {
11159 default: // Can't happen, don't crash on invalid number though.
11160 case 0: // Return the value of the EQ bit of CR6.
11161 BitNo = 0; InvertBit = false;
11162 break;
11163 case 1: // Return the inverted value of the EQ bit of CR6.
11164 BitNo = 0; InvertBit = true;
11165 break;
11166 case 2: // Return the value of the LT bit of CR6.
11167 BitNo = 2; InvertBit = false;
11168 break;
11169 case 3: // Return the inverted value of the LT bit of CR6.
11170 BitNo = 2; InvertBit = true;
11171 break;
11172 }
11173
11174 // Shift the bit into the low position.
11175 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11176 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11177 // Isolate the bit.
11178 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11179 DAG.getConstant(1, dl, MVT::i32));
11180
11181 // If we are supposed to, toggle the bit.
11182 if (InvertBit)
11183 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11184 DAG.getConstant(1, dl, MVT::i32));
11185 return Flags;
11186}
11187
11188SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11189 SelectionDAG &DAG) const {
11190 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11191 // the beginning of the argument list.
11192 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11193 SDLoc DL(Op);
11194 switch (Op.getConstantOperandVal(ArgStart)) {
11195 case Intrinsic::ppc_cfence: {
11196 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11197 SDValue Val = Op.getOperand(ArgStart + 1);
11198 EVT Ty = Val.getValueType();
11199 if (Ty == MVT::i128) {
11200 // FIXME: Testing one of two paired registers is sufficient to guarantee
11201 // ordering?
11202 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11203 }
11204 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11205 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11206 return SDValue(
11207 DAG.getMachineNode(Opcode, DL, MVT::Other,
11208 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11209 Op.getOperand(0)),
11210 0);
11211 }
11212 default:
11213 break;
11214 }
11215 return SDValue();
11216}
11217
11218// Lower scalar BSWAP64 to xxbrd.
11219SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11220 SDLoc dl(Op);
11221 if (!Subtarget.isPPC64())
11222 return Op;
11223 // MTVSRDD
11224 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11225 Op.getOperand(0));
11226 // XXBRD
11227 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11228 // MFVSRD
11229 int VectorIndex = 0;
11230 if (Subtarget.isLittleEndian())
11231 VectorIndex = 1;
11232 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11233 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11234 return Op;
11235}
11236
11237// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11238// compared to a value that is atomically loaded (atomic loads zero-extend).
11239SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11240 SelectionDAG &DAG) const {
11241 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11242 "Expecting an atomic compare-and-swap here.");
11243 SDLoc dl(Op);
11244 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11245 EVT MemVT = AtomicNode->getMemoryVT();
11246 if (MemVT.getSizeInBits() >= 32)
11247 return Op;
11248
11249 SDValue CmpOp = Op.getOperand(2);
11250 // If this is already correctly zero-extended, leave it alone.
11251 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11252 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11253 return Op;
11254
11255 // Clear the high bits of the compare operand.
11256 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11257 SDValue NewCmpOp =
11258 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11259 DAG.getConstant(MaskVal, dl, MVT::i32));
11260
11261 // Replace the existing compare operand with the properly zero-extended one.
11263 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11264 Ops.push_back(AtomicNode->getOperand(i));
11265 Ops[2] = NewCmpOp;
11266 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11267 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11268 auto NodeTy =
11270 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11271}
11272
11273SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11274 SelectionDAG &DAG) const {
11275 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11276 EVT MemVT = N->getMemoryVT();
11277 assert(MemVT.getSimpleVT() == MVT::i128 &&
11278 "Expect quadword atomic operations");
11279 SDLoc dl(N);
11280 unsigned Opc = N->getOpcode();
11281 switch (Opc) {
11282 case ISD::ATOMIC_LOAD: {
11283 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11284 // lowered to ppc instructions by pattern matching instruction selector.
11285 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11287 N->getOperand(0),
11288 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11289 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11290 Ops.push_back(N->getOperand(I));
11291 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11292 Ops, MemVT, N->getMemOperand());
11293 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11294 SDValue ValHi =
11295 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11296 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11297 DAG.getConstant(64, dl, MVT::i32));
11298 SDValue Val =
11299 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11300 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11301 {Val, LoadedVal.getValue(2)});
11302 }
11303 case ISD::ATOMIC_STORE: {
11304 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11305 // lowered to ppc instructions by pattern matching instruction selector.
11306 SDVTList Tys = DAG.getVTList(MVT::Other);
11308 N->getOperand(0),
11309 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11310 SDValue Val = N->getOperand(1);
11311 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11312 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11313 DAG.getConstant(64, dl, MVT::i32));
11314 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11315 Ops.push_back(ValLo);
11316 Ops.push_back(ValHi);
11317 Ops.push_back(N->getOperand(2));
11318 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11319 N->getMemOperand());
11320 }
11321 default:
11322 llvm_unreachable("Unexpected atomic opcode");
11323 }
11324}
11325
11327 SelectionDAG &DAG,
11328 const PPCSubtarget &Subtarget) {
11329 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11330
11331 enum DataClassMask {
11332 DC_NAN = 1 << 6,
11333 DC_NEG_INF = 1 << 4,
11334 DC_POS_INF = 1 << 5,
11335 DC_NEG_ZERO = 1 << 2,
11336 DC_POS_ZERO = 1 << 3,
11337 DC_NEG_SUBNORM = 1,
11338 DC_POS_SUBNORM = 1 << 1,
11339 };
11340
11341 EVT VT = Op.getValueType();
11342
11343 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11344 : VT == MVT::f64 ? PPC::XSTSTDCDP
11345 : PPC::XSTSTDCSP;
11346
11347 if (Mask == fcAllFlags)
11348 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11349 if (Mask == 0)
11350 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11351
11352 // When it's cheaper or necessary to test reverse flags.
11353 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11354 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11355 return DAG.getNOT(Dl, Rev, MVT::i1);
11356 }
11357
11358 // Power doesn't support testing whether a value is 'normal'. Test the rest
11359 // first, and test if it's 'not not-normal' with expected sign.
11360 if (Mask & fcNormal) {
11361 SDValue Rev(DAG.getMachineNode(
11362 TestOp, Dl, MVT::i32,
11363 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11364 DC_NEG_ZERO | DC_POS_ZERO |
11365 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11366 Dl, MVT::i32),
11367 Op),
11368 0);
11369 // Sign are stored in CR bit 0, result are in CR bit 2.
11370 SDValue Sign(
11371 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11372 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11373 0);
11374 SDValue Normal(DAG.getNOT(
11375 Dl,
11377 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11378 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11379 0),
11380 MVT::i1));
11381 if (Mask & fcPosNormal)
11382 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11383 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11384 if (Mask == fcPosNormal || Mask == fcNegNormal)
11385 return Result;
11386
11387 return DAG.getNode(
11388 ISD::OR, Dl, MVT::i1,
11389 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11390 }
11391
11392 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11393 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11394 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11395 bool IsQuiet = Mask & fcQNan;
11396 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11397
11398 // Quietness is determined by the first bit in fraction field.
11399 uint64_t QuietMask = 0;
11400 SDValue HighWord;
11401 if (VT == MVT::f128) {
11402 HighWord = DAG.getNode(
11403 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11404 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11405 QuietMask = 0x8000;
11406 } else if (VT == MVT::f64) {
11407 if (Subtarget.isPPC64()) {
11408 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11409 DAG.getBitcast(MVT::i64, Op),
11410 DAG.getConstant(1, Dl, MVT::i32));
11411 } else {
11412 SDValue Vec = DAG.getBitcast(
11413 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11414 HighWord = DAG.getNode(
11415 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11416 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11417 }
11418 QuietMask = 0x80000;
11419 } else if (VT == MVT::f32) {
11420 HighWord = DAG.getBitcast(MVT::i32, Op);
11421 QuietMask = 0x400000;
11422 }
11423 SDValue NanRes = DAG.getSetCC(
11424 Dl, MVT::i1,
11425 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11426 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11427 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11428 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11429 if (Mask == fcQNan || Mask == fcSNan)
11430 return NanRes;
11431
11432 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11433 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11434 NanRes);
11435 }
11436
11437 unsigned NativeMask = 0;
11438 if ((Mask & fcNan) == fcNan)
11439 NativeMask |= DC_NAN;
11440 if (Mask & fcNegInf)
11441 NativeMask |= DC_NEG_INF;
11442 if (Mask & fcPosInf)
11443 NativeMask |= DC_POS_INF;
11444 if (Mask & fcNegZero)
11445 NativeMask |= DC_NEG_ZERO;
11446 if (Mask & fcPosZero)
11447 NativeMask |= DC_POS_ZERO;
11448 if (Mask & fcNegSubnormal)
11449 NativeMask |= DC_NEG_SUBNORM;
11450 if (Mask & fcPosSubnormal)
11451 NativeMask |= DC_POS_SUBNORM;
11452 return SDValue(
11453 DAG.getMachineNode(
11454 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11456 TestOp, Dl, MVT::i32,
11457 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11458 0),
11459 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11460 0);
11461}
11462
11463SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11464 SelectionDAG &DAG) const {
11465 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11466 SDValue LHS = Op.getOperand(0);
11467 uint64_t RHSC = Op.getConstantOperandVal(1);
11468 SDLoc Dl(Op);
11469 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11470 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11471}
11472
11473SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11474 SelectionDAG &DAG) const {
11475 SDLoc dl(Op);
11476 // Create a stack slot that is 16-byte aligned.
11478 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11479 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11480 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11481
11482 // Store the input value into Value#0 of the stack slot.
11483 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11485 // Load it out.
11486 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11487}
11488
11489SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11490 SelectionDAG &DAG) const {
11491 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11492 "Should only be called for ISD::INSERT_VECTOR_ELT");
11493
11494 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11495
11496 EVT VT = Op.getValueType();
11497 SDLoc dl(Op);
11498 SDValue V1 = Op.getOperand(0);
11499 SDValue V2 = Op.getOperand(1);
11500
11501 if (VT == MVT::v2f64 && C)
11502 return Op;
11503
11504 if (Subtarget.hasP9Vector()) {
11505 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11506 // because on P10, it allows this specific insert_vector_elt load pattern to
11507 // utilize the refactored load and store infrastructure in order to exploit
11508 // prefixed loads.
11509 // On targets with inexpensive direct moves (Power9 and up), a
11510 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11511 // load since a single precision load will involve conversion to double
11512 // precision on the load followed by another conversion to single precision.
11513 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11514 (isa<LoadSDNode>(V2))) {
11515 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11516 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11517 SDValue InsVecElt =
11518 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11519 BitcastLoad, Op.getOperand(2));
11520 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11521 }
11522 }
11523
11524 if (Subtarget.isISA3_1()) {
11525 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11526 return SDValue();
11527 // On P10, we have legal lowering for constant and variable indices for
11528 // all vectors.
11529 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11530 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11531 return Op;
11532 }
11533
11534 // Before P10, we have legal lowering for constant indices but not for
11535 // variable ones.
11536 if (!C)
11537 return SDValue();
11538
11539 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11540 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11541 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11542 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11543 unsigned InsertAtElement = C->getZExtValue();
11544 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11545 if (Subtarget.isLittleEndian()) {
11546 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11547 }
11548 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11549 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11550 }
11551 return Op;
11552}
11553
11554SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11555 SelectionDAG &DAG) const {
11556 SDLoc dl(Op);
11557 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11558 SDValue LoadChain = LN->getChain();
11559 SDValue BasePtr = LN->getBasePtr();
11560 EVT VT = Op.getValueType();
11561
11562 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11563 return Op;
11564
11565 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11566 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11567 // 2 or 4 vsx registers.
11568 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11569 "Type unsupported without MMA");
11570 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11571 "Type unsupported without paired vector support");
11572 Align Alignment = LN->getAlign();
11574 SmallVector<SDValue, 4> LoadChains;
11575 unsigned NumVecs = VT.getSizeInBits() / 128;
11576 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11577 SDValue Load =
11578 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11579 LN->getPointerInfo().getWithOffset(Idx * 16),
11580 commonAlignment(Alignment, Idx * 16),
11581 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11582 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11583 DAG.getConstant(16, dl, BasePtr.getValueType()));
11584 Loads.push_back(Load);
11585 LoadChains.push_back(Load.getValue(1));
11586 }
11587 if (Subtarget.isLittleEndian()) {
11588 std::reverse(Loads.begin(), Loads.end());
11589 std::reverse(LoadChains.begin(), LoadChains.end());
11590 }
11591 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11592 SDValue Value =
11593 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11594 dl, VT, Loads);
11595 SDValue RetOps[] = {Value, TF};
11596 return DAG.getMergeValues(RetOps, dl);
11597}
11598
11599SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11600 SelectionDAG &DAG) const {
11601 SDLoc dl(Op);
11602 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11603 SDValue StoreChain = SN->getChain();
11604 SDValue BasePtr = SN->getBasePtr();
11605 SDValue Value = SN->getValue();
11606 SDValue Value2 = SN->getValue();
11607 EVT StoreVT = Value.getValueType();
11608
11609 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11610 return Op;
11611
11612 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11613 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11614 // underlying registers individually.
11615 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11616 "Type unsupported without MMA");
11617 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11618 "Type unsupported without paired vector support");
11619 Align Alignment = SN->getAlign();
11621 unsigned NumVecs = 2;
11622 if (StoreVT == MVT::v512i1) {
11623 if (Subtarget.isISAFuture()) {
11624 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11625 MachineSDNode *ExtNode = DAG.getMachineNode(
11626 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11627
11628 Value = SDValue(ExtNode, 0);
11629 Value2 = SDValue(ExtNode, 1);
11630 } else
11631 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11632 NumVecs = 4;
11633 }
11634 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11635 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11636 SDValue Elt;
11637 if (Subtarget.isISAFuture()) {
11638 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11639 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11640 Idx > 1 ? Value2 : Value,
11641 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11642 } else
11643 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11644 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11645
11646 SDValue Store =
11647 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11648 SN->getPointerInfo().getWithOffset(Idx * 16),
11649 commonAlignment(Alignment, Idx * 16),
11650 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11651 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11652 DAG.getConstant(16, dl, BasePtr.getValueType()));
11653 Stores.push_back(Store);
11654 }
11655 SDValue TF = DAG.getTokenFactor(dl, Stores);
11656 return TF;
11657}
11658
11659SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11660 SDLoc dl(Op);
11661 if (Op.getValueType() == MVT::v4i32) {
11662 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11663
11664 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11665 // +16 as shift amt.
11666 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11667 SDValue RHSSwap = // = vrlw RHS, 16
11668 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11669
11670 // Shrinkify inputs to v8i16.
11671 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11672 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11673 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11674
11675 // Low parts multiplied together, generating 32-bit results (we ignore the
11676 // top parts).
11677 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11678 LHS, RHS, DAG, dl, MVT::v4i32);
11679
11680 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11681 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11682 // Shift the high parts up 16 bits.
11683 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11684 Neg16, DAG, dl);
11685 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11686 } else if (Op.getValueType() == MVT::v16i8) {
11687 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11688 bool isLittleEndian = Subtarget.isLittleEndian();
11689
11690 // Multiply the even 8-bit parts, producing 16-bit sums.
11691 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11692 LHS, RHS, DAG, dl, MVT::v8i16);
11693 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11694
11695 // Multiply the odd 8-bit parts, producing 16-bit sums.
11696 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11697 LHS, RHS, DAG, dl, MVT::v8i16);
11698 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11699
11700 // Merge the results together. Because vmuleub and vmuloub are
11701 // instructions with a big-endian bias, we must reverse the
11702 // element numbering and reverse the meaning of "odd" and "even"
11703 // when generating little endian code.
11704 int Ops[16];
11705 for (unsigned i = 0; i != 8; ++i) {
11706 if (isLittleEndian) {
11707 Ops[i*2 ] = 2*i;
11708 Ops[i*2+1] = 2*i+16;
11709 } else {
11710 Ops[i*2 ] = 2*i+1;
11711 Ops[i*2+1] = 2*i+1+16;
11712 }
11713 }
11714 if (isLittleEndian)
11715 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11716 else
11717 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11718 } else {
11719 llvm_unreachable("Unknown mul to lower!");
11720 }
11721}
11722
11723SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11724 bool IsStrict = Op->isStrictFPOpcode();
11725 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11726 !Subtarget.hasP9Vector())
11727 return SDValue();
11728
11729 return Op;
11730}
11731
11732// Custom lowering for fpext vf32 to v2f64
11733SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11734
11735 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11736 "Should only be called for ISD::FP_EXTEND");
11737
11738 // FIXME: handle extends from half precision float vectors on P9.
11739 // We only want to custom lower an extend from v2f32 to v2f64.
11740 if (Op.getValueType() != MVT::v2f64 ||
11741 Op.getOperand(0).getValueType() != MVT::v2f32)
11742 return SDValue();
11743
11744 SDLoc dl(Op);
11745 SDValue Op0 = Op.getOperand(0);
11746
11747 switch (Op0.getOpcode()) {
11748 default:
11749 return SDValue();
11751 assert(Op0.getNumOperands() == 2 &&
11752 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11753 "Node should have 2 operands with second one being a constant!");
11754
11755 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11756 return SDValue();
11757
11758 // Custom lower is only done for high or low doubleword.
11759 int Idx = Op0.getConstantOperandVal(1);
11760 if (Idx % 2 != 0)
11761 return SDValue();
11762
11763 // Since input is v4f32, at this point Idx is either 0 or 2.
11764 // Shift to get the doubleword position we want.
11765 int DWord = Idx >> 1;
11766
11767 // High and low word positions are different on little endian.
11768 if (Subtarget.isLittleEndian())
11769 DWord ^= 0x1;
11770
11771 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11772 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11773 }
11774 case ISD::FADD:
11775 case ISD::FMUL:
11776 case ISD::FSUB: {
11777 SDValue NewLoad[2];
11778 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11779 // Ensure both input are loads.
11780 SDValue LdOp = Op0.getOperand(i);
11781 if (LdOp.getOpcode() != ISD::LOAD)
11782 return SDValue();
11783 // Generate new load node.
11784 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11785 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11786 NewLoad[i] = DAG.getMemIntrinsicNode(
11787 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11788 LD->getMemoryVT(), LD->getMemOperand());
11789 }
11790 SDValue NewOp =
11791 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11792 NewLoad[1], Op0.getNode()->getFlags());
11793 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11794 DAG.getConstant(0, dl, MVT::i32));
11795 }
11796 case ISD::LOAD: {
11797 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11798 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11799 SDValue NewLd = DAG.getMemIntrinsicNode(
11800 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11801 LD->getMemoryVT(), LD->getMemOperand());
11802 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11803 DAG.getConstant(0, dl, MVT::i32));
11804 }
11805 }
11806 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11807}
11808
11809/// LowerOperation - Provide custom lowering hooks for some operations.
11810///
11812 switch (Op.getOpcode()) {
11813 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11814 case ISD::FPOW: return lowerPow(Op, DAG);
11815 case ISD::FSIN: return lowerSin(Op, DAG);
11816 case ISD::FCOS: return lowerCos(Op, DAG);
11817 case ISD::FLOG: return lowerLog(Op, DAG);
11818 case ISD::FLOG10: return lowerLog10(Op, DAG);
11819 case ISD::FEXP: return lowerExp(Op, DAG);
11820 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11821 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11822 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11823 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11824 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11825 case ISD::STRICT_FSETCC:
11827 case ISD::SETCC: return LowerSETCC(Op, DAG);
11828 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11829 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11830
11831 case ISD::INLINEASM:
11832 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11833 // Variable argument lowering.
11834 case ISD::VASTART: return LowerVASTART(Op, DAG);
11835 case ISD::VAARG: return LowerVAARG(Op, DAG);
11836 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11837
11838 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11839 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11841 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11842
11843 // Exception handling lowering.
11844 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11845 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11846 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11847
11848 case ISD::LOAD: return LowerLOAD(Op, DAG);
11849 case ISD::STORE: return LowerSTORE(Op, DAG);
11850 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11851 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11854 case ISD::FP_TO_UINT:
11855 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11858 case ISD::UINT_TO_FP:
11859 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11860 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11861
11862 // Lower 64-bit shifts.
11863 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11864 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11865 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11866
11867 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11868 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11869
11870 // Vector-related lowering.
11871 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11872 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11873 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11874 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11875 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11876 case ISD::MUL: return LowerMUL(Op, DAG);
11877 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11879 case ISD::FP_ROUND:
11880 return LowerFP_ROUND(Op, DAG);
11881 case ISD::ROTL: return LowerROTL(Op, DAG);
11882
11883 // For counter-based loop handling.
11884 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11885
11886 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11887
11888 // Frame & Return address.
11889 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11890 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11891
11893 return LowerINTRINSIC_VOID(Op, DAG);
11894 case ISD::BSWAP:
11895 return LowerBSWAP(Op, DAG);
11897 return LowerATOMIC_CMP_SWAP(Op, DAG);
11898 case ISD::ATOMIC_STORE:
11899 return LowerATOMIC_LOAD_STORE(Op, DAG);
11900 case ISD::IS_FPCLASS:
11901 return LowerIS_FPCLASS(Op, DAG);
11902 }
11903}
11904
11907 SelectionDAG &DAG) const {
11908 SDLoc dl(N);
11909 switch (N->getOpcode()) {
11910 default:
11911 llvm_unreachable("Do not know how to custom type legalize this operation!");
11912 case ISD::ATOMIC_LOAD: {
11913 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11914 Results.push_back(Res);
11915 Results.push_back(Res.getValue(1));
11916 break;
11917 }
11918 case ISD::READCYCLECOUNTER: {
11919 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11920 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11921
11922 Results.push_back(
11923 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11924 Results.push_back(RTB.getValue(2));
11925 break;
11926 }
11928 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11929 break;
11930
11931 assert(N->getValueType(0) == MVT::i1 &&
11932 "Unexpected result type for CTR decrement intrinsic");
11933 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11934 N->getValueType(0));
11935 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11936 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11937 N->getOperand(1));
11938
11939 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11940 Results.push_back(NewInt.getValue(1));
11941 break;
11942 }
11944 switch (N->getConstantOperandVal(0)) {
11945 case Intrinsic::ppc_pack_longdouble:
11946 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11947 N->getOperand(2), N->getOperand(1)));
11948 break;
11949 case Intrinsic::ppc_maxfe:
11950 case Intrinsic::ppc_minfe:
11951 case Intrinsic::ppc_fnmsub:
11952 case Intrinsic::ppc_convert_f128_to_ppcf128:
11953 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11954 break;
11955 }
11956 break;
11957 }
11958 case ISD::VAARG: {
11959 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11960 return;
11961
11962 EVT VT = N->getValueType(0);
11963
11964 if (VT == MVT::i64) {
11965 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11966
11967 Results.push_back(NewNode);
11968 Results.push_back(NewNode.getValue(1));
11969 }
11970 return;
11971 }
11974 case ISD::FP_TO_SINT:
11975 case ISD::FP_TO_UINT: {
11976 // LowerFP_TO_INT() can only handle f32 and f64.
11977 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11978 MVT::ppcf128)
11979 return;
11980 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11981 Results.push_back(LoweredValue);
11982 if (N->isStrictFPOpcode())
11983 Results.push_back(LoweredValue.getValue(1));
11984 return;
11985 }
11986 case ISD::TRUNCATE: {
11987 if (!N->getValueType(0).isVector())
11988 return;
11989 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11990 if (Lowered)
11991 Results.push_back(Lowered);
11992 return;
11993 }
11994 case ISD::FSHL:
11995 case ISD::FSHR:
11996 // Don't handle funnel shifts here.
11997 return;
11998 case ISD::BITCAST:
11999 // Don't handle bitcast here.
12000 return;
12001 case ISD::FP_EXTEND:
12002 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12003 if (Lowered)
12004 Results.push_back(Lowered);
12005 return;
12006 }
12007}
12008
12009//===----------------------------------------------------------------------===//
12010// Other Lowering Code
12011//===----------------------------------------------------------------------===//
12012
12014 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
12015 Function *Func = Intrinsic::getDeclaration(M, Id);
12016 return Builder.CreateCall(Func, {});
12017}
12018
12019// The mappings for emitLeading/TrailingFence is taken from
12020// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12022 Instruction *Inst,
12023 AtomicOrdering Ord) const {
12025 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12026 if (isReleaseOrStronger(Ord))
12027 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12028 return nullptr;
12029}
12030
12032 Instruction *Inst,
12033 AtomicOrdering Ord) const {
12034 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12035 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12036 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12037 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12038 if (isa<LoadInst>(Inst))
12039 return Builder.CreateCall(
12041 Builder.GetInsertBlock()->getParent()->getParent(),
12042 Intrinsic::ppc_cfence, {Inst->getType()}),
12043 {Inst});
12044 // FIXME: Can use isync for rmw operation.
12045 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12046 }
12047 return nullptr;
12048}
12049
12052 unsigned AtomicSize,
12053 unsigned BinOpcode,
12054 unsigned CmpOpcode,
12055 unsigned CmpPred) const {
12056 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12057 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12058
12059 auto LoadMnemonic = PPC::LDARX;
12060 auto StoreMnemonic = PPC::STDCX;
12061 switch (AtomicSize) {
12062 default:
12063 llvm_unreachable("Unexpected size of atomic entity");
12064 case 1:
12065 LoadMnemonic = PPC::LBARX;
12066 StoreMnemonic = PPC::STBCX;
12067 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12068 break;
12069 case 2:
12070 LoadMnemonic = PPC::LHARX;
12071 StoreMnemonic = PPC::STHCX;
12072 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12073 break;
12074 case 4:
12075 LoadMnemonic = PPC::LWARX;
12076 StoreMnemonic = PPC::STWCX;
12077 break;
12078 case 8:
12079 LoadMnemonic = PPC::LDARX;
12080 StoreMnemonic = PPC::STDCX;
12081 break;
12082 }
12083
12084 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12085 MachineFunction *F = BB->getParent();
12087
12088 Register dest = MI.getOperand(0).getReg();
12089 Register ptrA = MI.getOperand(1).getReg();
12090 Register ptrB = MI.getOperand(2).getReg();
12091 Register incr = MI.getOperand(3).getReg();
12092 DebugLoc dl = MI.getDebugLoc();
12093
12094 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12095 MachineBasicBlock *loop2MBB =
12096 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12097 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12098 F->insert(It, loopMBB);
12099 if (CmpOpcode)
12100 F->insert(It, loop2MBB);
12101 F->insert(It, exitMBB);
12102 exitMBB->splice(exitMBB->begin(), BB,
12103 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12105
12106 MachineRegisterInfo &RegInfo = F->getRegInfo();
12107 Register TmpReg = (!BinOpcode) ? incr :
12108 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12109 : &PPC::GPRCRegClass);
12110
12111 // thisMBB:
12112 // ...
12113 // fallthrough --> loopMBB
12114 BB->addSuccessor(loopMBB);
12115
12116 // loopMBB:
12117 // l[wd]arx dest, ptr
12118 // add r0, dest, incr
12119 // st[wd]cx. r0, ptr
12120 // bne- loopMBB
12121 // fallthrough --> exitMBB
12122
12123 // For max/min...
12124 // loopMBB:
12125 // l[wd]arx dest, ptr
12126 // cmpl?[wd] dest, incr
12127 // bgt exitMBB
12128 // loop2MBB:
12129 // st[wd]cx. dest, ptr
12130 // bne- loopMBB
12131 // fallthrough --> exitMBB
12132
12133 BB = loopMBB;
12134 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12135 .addReg(ptrA).addReg(ptrB);
12136 if (BinOpcode)
12137 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12138 if (CmpOpcode) {
12139 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12140 // Signed comparisons of byte or halfword values must be sign-extended.
12141 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12142 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12143 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12144 ExtReg).addReg(dest);
12145 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12146 } else
12147 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12148
12149 BuildMI(BB, dl, TII->get(PPC::BCC))
12150 .addImm(CmpPred)
12151 .addReg(CrReg)
12152 .addMBB(exitMBB);
12153 BB->addSuccessor(loop2MBB);
12154 BB->addSuccessor(exitMBB);
12155 BB = loop2MBB;
12156 }
12157 BuildMI(BB, dl, TII->get(StoreMnemonic))
12158 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12159 BuildMI(BB, dl, TII->get(PPC::BCC))
12160 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12161 BB->addSuccessor(loopMBB);
12162 BB->addSuccessor(exitMBB);
12163
12164 // exitMBB:
12165 // ...
12166 BB = exitMBB;
12167 return BB;
12168}
12169
12171 switch(MI.getOpcode()) {
12172 default:
12173 return false;
12174 case PPC::COPY:
12175 return TII->isSignExtended(MI.getOperand(1).getReg(),
12176 &MI.getMF()->getRegInfo());
12177 case PPC::LHA:
12178 case PPC::LHA8:
12179 case PPC::LHAU:
12180 case PPC::LHAU8:
12181 case PPC::LHAUX:
12182 case PPC::LHAUX8:
12183 case PPC::LHAX:
12184 case PPC::LHAX8:
12185 case PPC::LWA:
12186 case PPC::LWAUX:
12187 case PPC::LWAX:
12188 case PPC::LWAX_32:
12189 case PPC::LWA_32:
12190 case PPC::PLHA:
12191 case PPC::PLHA8:
12192 case PPC::PLHA8pc:
12193 case PPC::PLHApc:
12194 case PPC::PLWA:
12195 case PPC::PLWA8:
12196 case PPC::PLWA8pc:
12197 case PPC::PLWApc:
12198 case PPC::EXTSB:
12199 case PPC::EXTSB8:
12200 case PPC::EXTSB8_32_64:
12201 case PPC::EXTSB8_rec:
12202 case PPC::EXTSB_rec:
12203 case PPC::EXTSH:
12204 case PPC::EXTSH8:
12205 case PPC::EXTSH8_32_64:
12206 case PPC::EXTSH8_rec:
12207 case PPC::EXTSH_rec:
12208 case PPC::EXTSW:
12209 case PPC::EXTSWSLI:
12210 case PPC::EXTSWSLI_32_64:
12211 case PPC::EXTSWSLI_32_64_rec:
12212 case PPC::EXTSWSLI_rec:
12213 case PPC::EXTSW_32:
12214 case PPC::EXTSW_32_64:
12215 case PPC::EXTSW_32_64_rec:
12216 case PPC::EXTSW_rec:
12217 case PPC::SRAW:
12218 case PPC::SRAWI:
12219 case PPC::SRAWI_rec:
12220 case PPC::SRAW_rec:
12221 return true;
12222 }
12223 return false;
12224}
12225
12228 bool is8bit, // operation
12229 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12230 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12231 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12232
12233 // If this is a signed comparison and the value being compared is not known
12234 // to be sign extended, sign extend it here.
12235 DebugLoc dl = MI.getDebugLoc();
12236 MachineFunction *F = BB->getParent();
12237 MachineRegisterInfo &RegInfo = F->getRegInfo();
12238 Register incr = MI.getOperand(3).getReg();
12239 bool IsSignExtended =
12240 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12241
12242 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12243 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12244 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12245 .addReg(MI.getOperand(3).getReg());
12246 MI.getOperand(3).setReg(ValueReg);
12247 incr = ValueReg;
12248 }
12249 // If we support part-word atomic mnemonics, just use them
12250 if (Subtarget.hasPartwordAtomics())
12251 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12252 CmpPred);
12253
12254 // In 64 bit mode we have to use 64 bits for addresses, even though the
12255 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12256 // registers without caring whether they're 32 or 64, but here we're
12257 // doing actual arithmetic on the addresses.
12258 bool is64bit = Subtarget.isPPC64();
12259 bool isLittleEndian = Subtarget.isLittleEndian();
12260 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12261
12262 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12264
12265 Register dest = MI.getOperand(0).getReg();
12266 Register ptrA = MI.getOperand(1).getReg();
12267 Register ptrB = MI.getOperand(2).getReg();
12268
12269 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12270 MachineBasicBlock *loop2MBB =
12271 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12272 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12273 F->insert(It, loopMBB);
12274 if (CmpOpcode)
12275 F->insert(It, loop2MBB);
12276 F->insert(It, exitMBB);
12277 exitMBB->splice(exitMBB->begin(), BB,
12278 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12280
12281 const TargetRegisterClass *RC =
12282 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12283 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12284
12285 Register PtrReg = RegInfo.createVirtualRegister(RC);
12286 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12287 Register ShiftReg =
12288 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12289 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12290 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12291 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12292 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12293 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12294 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12295 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12296 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12297 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12298 Register Ptr1Reg;
12299 Register TmpReg =
12300 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12301
12302 // thisMBB:
12303 // ...
12304 // fallthrough --> loopMBB
12305 BB->addSuccessor(loopMBB);
12306
12307 // The 4-byte load must be aligned, while a char or short may be
12308 // anywhere in the word. Hence all this nasty bookkeeping code.
12309 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12310 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12311 // xori shift, shift1, 24 [16]
12312 // rlwinm ptr, ptr1, 0, 0, 29
12313 // slw incr2, incr, shift
12314 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12315 // slw mask, mask2, shift
12316 // loopMBB:
12317 // lwarx tmpDest, ptr
12318 // add tmp, tmpDest, incr2
12319 // andc tmp2, tmpDest, mask
12320 // and tmp3, tmp, mask
12321 // or tmp4, tmp3, tmp2
12322 // stwcx. tmp4, ptr
12323 // bne- loopMBB
12324 // fallthrough --> exitMBB
12325 // srw SrwDest, tmpDest, shift
12326 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12327 if (ptrA != ZeroReg) {
12328 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12329 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12330 .addReg(ptrA)
12331 .addReg(ptrB);
12332 } else {
12333 Ptr1Reg = ptrB;
12334 }
12335 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12336 // mode.
12337 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12338 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12339 .addImm(3)
12340 .addImm(27)
12341 .addImm(is8bit ? 28 : 27);
12342 if (!isLittleEndian)
12343 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12344 .addReg(Shift1Reg)
12345 .addImm(is8bit ? 24 : 16);
12346 if (is64bit)
12347 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12348 .addReg(Ptr1Reg)
12349 .addImm(0)
12350 .addImm(61);
12351 else
12352 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12353 .addReg(Ptr1Reg)
12354 .addImm(0)
12355 .addImm(0)
12356 .addImm(29);
12357 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12358 if (is8bit)
12359 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12360 else {
12361 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12362 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12363 .addReg(Mask3Reg)
12364 .addImm(65535);
12365 }
12366 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12367 .addReg(Mask2Reg)
12368 .addReg(ShiftReg);
12369
12370 BB = loopMBB;
12371 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12372 .addReg(ZeroReg)
12373 .addReg(PtrReg);
12374 if (BinOpcode)
12375 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12376 .addReg(Incr2Reg)
12377 .addReg(TmpDestReg);
12378 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12379 .addReg(TmpDestReg)
12380 .addReg(MaskReg);
12381 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12382 if (CmpOpcode) {
12383 // For unsigned comparisons, we can directly compare the shifted values.
12384 // For signed comparisons we shift and sign extend.
12385 Register SReg = RegInfo.createVirtualRegister(GPRC);
12386 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12387 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12388 .addReg(TmpDestReg)
12389 .addReg(MaskReg);
12390 unsigned ValueReg = SReg;
12391 unsigned CmpReg = Incr2Reg;
12392 if (CmpOpcode == PPC::CMPW) {
12393 ValueReg = RegInfo.createVirtualRegister(GPRC);
12394 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12395 .addReg(SReg)
12396 .addReg(ShiftReg);
12397 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12398 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12399 .addReg(ValueReg);
12400 ValueReg = ValueSReg;
12401 CmpReg = incr;
12402 }
12403 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12404 BuildMI(BB, dl, TII->get(PPC::BCC))
12405 .addImm(CmpPred)
12406 .addReg(CrReg)
12407 .addMBB(exitMBB);
12408 BB->addSuccessor(loop2MBB);
12409 BB->addSuccessor(exitMBB);
12410 BB = loop2MBB;
12411 }
12412 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12413 BuildMI(BB, dl, TII->get(PPC::STWCX))
12414 .addReg(Tmp4Reg)
12415 .addReg(ZeroReg)
12416 .addReg(PtrReg);
12417 BuildMI(BB, dl, TII->get(PPC::BCC))
12419 .addReg(PPC::CR0)
12420 .addMBB(loopMBB);
12421 BB->addSuccessor(loopMBB);
12422 BB->addSuccessor(exitMBB);
12423
12424 // exitMBB:
12425 // ...
12426 BB = exitMBB;
12427 // Since the shift amount is not a constant, we need to clear
12428 // the upper bits with a separate RLWINM.
12429 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12430 .addReg(SrwDestReg)
12431 .addImm(0)
12432 .addImm(is8bit ? 24 : 16)
12433 .addImm(31);
12434 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12435 .addReg(TmpDestReg)
12436 .addReg(ShiftReg);
12437 return BB;
12438}
12439
12442 MachineBasicBlock *MBB) const {
12443 DebugLoc DL = MI.getDebugLoc();
12444 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12445 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12446
12447 MachineFunction *MF = MBB->getParent();
12449
12450 const BasicBlock *BB = MBB->getBasicBlock();
12452
12453 Register DstReg = MI.getOperand(0).getReg();
12454 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12455 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12456 Register mainDstReg = MRI.createVirtualRegister(RC);
12457 Register restoreDstReg = MRI.createVirtualRegister(RC);
12458
12459 MVT PVT = getPointerTy(MF->getDataLayout());
12460 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12461 "Invalid Pointer Size!");
12462 // For v = setjmp(buf), we generate
12463 //
12464 // thisMBB:
12465 // SjLjSetup mainMBB
12466 // bl mainMBB
12467 // v_restore = 1
12468 // b sinkMBB
12469 //
12470 // mainMBB:
12471 // buf[LabelOffset] = LR
12472 // v_main = 0
12473 //
12474 // sinkMBB:
12475 // v = phi(main, restore)
12476 //
12477
12478 MachineBasicBlock *thisMBB = MBB;
12479 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12480 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12481 MF->insert(I, mainMBB);
12482 MF->insert(I, sinkMBB);
12483
12485
12486 // Transfer the remainder of BB and its successor edges to sinkMBB.
12487 sinkMBB->splice(sinkMBB->begin(), MBB,
12488 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12490
12491 // Note that the structure of the jmp_buf used here is not compatible
12492 // with that used by libc, and is not designed to be. Specifically, it
12493 // stores only those 'reserved' registers that LLVM does not otherwise
12494 // understand how to spill. Also, by convention, by the time this
12495 // intrinsic is called, Clang has already stored the frame address in the
12496 // first slot of the buffer and stack address in the third. Following the
12497 // X86 target code, we'll store the jump address in the second slot. We also
12498 // need to save the TOC pointer (R2) to handle jumps between shared
12499 // libraries, and that will be stored in the fourth slot. The thread
12500 // identifier (R13) is not affected.
12501
12502 // thisMBB:
12503 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12504 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12505 const int64_t BPOffset = 4 * PVT.getStoreSize();
12506
12507 // Prepare IP either in reg.
12508 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12509 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12510 Register BufReg = MI.getOperand(1).getReg();
12511
12512 if (Subtarget.is64BitELFABI()) {
12514 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12515 .addReg(PPC::X2)
12516 .addImm(TOCOffset)
12517 .addReg(BufReg)
12518 .cloneMemRefs(MI);
12519 }
12520
12521 // Naked functions never have a base pointer, and so we use r1. For all
12522 // other functions, this decision must be delayed until during PEI.
12523 unsigned BaseReg;
12524 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12525 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12526 else
12527 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12528
12529 MIB = BuildMI(*thisMBB, MI, DL,
12530 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12531 .addReg(BaseReg)
12532 .addImm(BPOffset)
12533 .addReg(BufReg)
12534 .cloneMemRefs(MI);
12535
12536 // Setup
12537 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12538 MIB.addRegMask(TRI->getNoPreservedMask());
12539
12540 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12541
12542 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12543 .addMBB(mainMBB);
12544 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12545
12546 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12547 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12548
12549 // mainMBB:
12550 // mainDstReg = 0
12551 MIB =
12552 BuildMI(mainMBB, DL,
12553 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12554
12555 // Store IP
12556 if (Subtarget.isPPC64()) {
12557 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12558 .addReg(LabelReg)
12559 .addImm(LabelOffset)
12560 .addReg(BufReg);
12561 } else {
12562 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12563 .addReg(LabelReg)
12564 .addImm(LabelOffset)
12565 .addReg(BufReg);
12566 }
12567 MIB.cloneMemRefs(MI);
12568
12569 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12570 mainMBB->addSuccessor(sinkMBB);
12571
12572 // sinkMBB:
12573 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12574 TII->get(PPC::PHI), DstReg)
12575 .addReg(mainDstReg).addMBB(mainMBB)
12576 .addReg(restoreDstReg).addMBB(thisMBB);
12577
12578 MI.eraseFromParent();
12579 return sinkMBB;
12580}
12581
12584 MachineBasicBlock *MBB) const {
12585 DebugLoc DL = MI.getDebugLoc();
12586 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12587
12588 MachineFunction *MF = MBB->getParent();
12590
12591 MVT PVT = getPointerTy(MF->getDataLayout());
12592 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12593 "Invalid Pointer Size!");
12594
12595 const TargetRegisterClass *RC =
12596 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12597 Register Tmp = MRI.createVirtualRegister(RC);
12598 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12599 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12600 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12601 unsigned BP =
12602 (PVT == MVT::i64)
12603 ? PPC::X30
12604 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12605 : PPC::R30);
12606
12608
12609 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12610 const int64_t SPOffset = 2 * PVT.getStoreSize();
12611 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12612 const int64_t BPOffset = 4 * PVT.getStoreSize();
12613
12614 Register BufReg = MI.getOperand(0).getReg();
12615
12616 // Reload FP (the jumped-to function may not have had a
12617 // frame pointer, and if so, then its r31 will be restored
12618 // as necessary).
12619 if (PVT == MVT::i64) {
12620 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12621 .addImm(0)
12622 .addReg(BufReg);
12623 } else {
12624 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12625 .addImm(0)
12626 .addReg(BufReg);
12627 }
12628 MIB.cloneMemRefs(MI);
12629
12630 // Reload IP
12631 if (PVT == MVT::i64) {
12632 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12633 .addImm(LabelOffset)
12634 .addReg(BufReg);
12635 } else {
12636 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12637 .addImm(LabelOffset)
12638 .addReg(BufReg);
12639 }
12640 MIB.cloneMemRefs(MI);
12641
12642 // Reload SP
12643 if (PVT == MVT::i64) {
12644 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12645 .addImm(SPOffset)
12646 .addReg(BufReg);
12647 } else {
12648 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12649 .addImm(SPOffset)
12650 .addReg(BufReg);
12651 }
12652 MIB.cloneMemRefs(MI);
12653
12654 // Reload BP
12655 if (PVT == MVT::i64) {
12656 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12657 .addImm(BPOffset)
12658 .addReg(BufReg);
12659 } else {
12660 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12661 .addImm(BPOffset)
12662 .addReg(BufReg);
12663 }
12664 MIB.cloneMemRefs(MI);
12665
12666 // Reload TOC
12667 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12669 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12670 .addImm(TOCOffset)
12671 .addReg(BufReg)
12672 .cloneMemRefs(MI);
12673 }
12674
12675 // Jump
12676 BuildMI(*MBB, MI, DL,
12677 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12678 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12679
12680 MI.eraseFromParent();
12681 return MBB;
12682}
12683
12685 // If the function specifically requests inline stack probes, emit them.
12686 if (MF.getFunction().hasFnAttribute("probe-stack"))
12687 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12688 "inline-asm";
12689 return false;
12690}
12691
12693 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12694 unsigned StackAlign = TFI->getStackAlignment();
12695 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12696 "Unexpected stack alignment");
12697 // The default stack probe size is 4096 if the function has no
12698 // stack-probe-size attribute.
12699 const Function &Fn = MF.getFunction();
12700 unsigned StackProbeSize =
12701 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12702 // Round down to the stack alignment.
12703 StackProbeSize &= ~(StackAlign - 1);
12704 return StackProbeSize ? StackProbeSize : StackAlign;
12705}
12706
12707// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12708// into three phases. In the first phase, it uses pseudo instruction
12709// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12710// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12711// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12712// MaxCallFrameSize so that it can calculate correct data area pointer.
12715 MachineBasicBlock *MBB) const {
12716 const bool isPPC64 = Subtarget.isPPC64();
12717 MachineFunction *MF = MBB->getParent();
12718 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12719 DebugLoc DL = MI.getDebugLoc();
12720 const unsigned ProbeSize = getStackProbeSize(*MF);
12721 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12723 // The CFG of probing stack looks as
12724 // +-----+
12725 // | MBB |
12726 // +--+--+
12727 // |
12728 // +----v----+
12729 // +--->+ TestMBB +---+
12730 // | +----+----+ |
12731 // | | |
12732 // | +-----v----+ |
12733 // +---+ BlockMBB | |
12734 // +----------+ |
12735 // |
12736 // +---------+ |
12737 // | TailMBB +<--+
12738 // +---------+
12739 // In MBB, calculate previous frame pointer and final stack pointer.
12740 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12741 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12742 // TailMBB is spliced via \p MI.
12743 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12744 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12745 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12746
12748 MF->insert(MBBIter, TestMBB);
12749 MF->insert(MBBIter, BlockMBB);
12750 MF->insert(MBBIter, TailMBB);
12751
12752 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12753 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12754
12755 Register DstReg = MI.getOperand(0).getReg();
12756 Register NegSizeReg = MI.getOperand(1).getReg();
12757 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12758 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12759 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12760 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12761
12762 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12763 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12764 // NegSize.
12765 unsigned ProbeOpc;
12766 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12767 ProbeOpc =
12768 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12769 else
12770 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12771 // and NegSizeReg will be allocated in the same phyreg to avoid
12772 // redundant copy when NegSizeReg has only one use which is current MI and
12773 // will be replaced by PREPARE_PROBED_ALLOCA then.
12774 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12775 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12776 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12777 .addDef(ActualNegSizeReg)
12778 .addReg(NegSizeReg)
12779 .add(MI.getOperand(2))
12780 .add(MI.getOperand(3));
12781
12782 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12783 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12784 FinalStackPtr)
12785 .addReg(SPReg)
12786 .addReg(ActualNegSizeReg);
12787
12788 // Materialize a scratch register for update.
12789 int64_t NegProbeSize = -(int64_t)ProbeSize;
12790 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12791 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12792 if (!isInt<16>(NegProbeSize)) {
12793 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12794 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12795 .addImm(NegProbeSize >> 16);
12796 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12797 ScratchReg)
12798 .addReg(TempReg)
12799 .addImm(NegProbeSize & 0xFFFF);
12800 } else
12801 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12802 .addImm(NegProbeSize);
12803
12804 {
12805 // Probing leading residual part.
12806 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12807 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12808 .addReg(ActualNegSizeReg)
12809 .addReg(ScratchReg);
12810 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12811 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12812 .addReg(Div)
12813 .addReg(ScratchReg);
12814 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12815 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12816 .addReg(Mul)
12817 .addReg(ActualNegSizeReg);
12818 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12819 .addReg(FramePointer)
12820 .addReg(SPReg)
12821 .addReg(NegMod);
12822 }
12823
12824 {
12825 // Remaining part should be multiple of ProbeSize.
12826 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12827 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12828 .addReg(SPReg)
12829 .addReg(FinalStackPtr);
12830 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12832 .addReg(CmpResult)
12833 .addMBB(TailMBB);
12834 TestMBB->addSuccessor(BlockMBB);
12835 TestMBB->addSuccessor(TailMBB);
12836 }
12837
12838 {
12839 // Touch the block.
12840 // |P...|P...|P...
12841 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12842 .addReg(FramePointer)
12843 .addReg(SPReg)
12844 .addReg(ScratchReg);
12845 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12846 BlockMBB->addSuccessor(TestMBB);
12847 }
12848
12849 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12850 // DYNAREAOFFSET pseudo instruction to get the future result.
12851 Register MaxCallFrameSizeReg =
12852 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12853 BuildMI(TailMBB, DL,
12854 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12855 MaxCallFrameSizeReg)
12856 .add(MI.getOperand(2))
12857 .add(MI.getOperand(3));
12858 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12859 .addReg(SPReg)
12860 .addReg(MaxCallFrameSizeReg);
12861
12862 // Splice instructions after MI to TailMBB.
12863 TailMBB->splice(TailMBB->end(), MBB,
12864 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12866 MBB->addSuccessor(TestMBB);
12867
12868 // Delete the pseudo instruction.
12869 MI.eraseFromParent();
12870
12871 ++NumDynamicAllocaProbed;
12872 return TailMBB;
12873}
12874
12876 switch (MI.getOpcode()) {
12877 case PPC::SELECT_CC_I4:
12878 case PPC::SELECT_CC_I8:
12879 case PPC::SELECT_CC_F4:
12880 case PPC::SELECT_CC_F8:
12881 case PPC::SELECT_CC_F16:
12882 case PPC::SELECT_CC_VRRC:
12883 case PPC::SELECT_CC_VSFRC:
12884 case PPC::SELECT_CC_VSSRC:
12885 case PPC::SELECT_CC_VSRC:
12886 case PPC::SELECT_CC_SPE4:
12887 case PPC::SELECT_CC_SPE:
12888 return true;
12889 default:
12890 return false;
12891 }
12892}
12893
12894static bool IsSelect(MachineInstr &MI) {
12895 switch (MI.getOpcode()) {
12896 case PPC::SELECT_I4:
12897 case PPC::SELECT_I8:
12898 case PPC::SELECT_F4:
12899 case PPC::SELECT_F8:
12900 case PPC::SELECT_F16:
12901 case PPC::SELECT_SPE:
12902 case PPC::SELECT_SPE4:
12903 case PPC::SELECT_VRRC:
12904 case PPC::SELECT_VSFRC:
12905 case PPC::SELECT_VSSRC:
12906 case PPC::SELECT_VSRC:
12907 return true;
12908 default:
12909 return false;
12910 }
12911}
12912
12915 MachineBasicBlock *BB) const {
12916 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12917 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12918 if (Subtarget.is64BitELFABI() &&
12919 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12920 !Subtarget.isUsingPCRelativeCalls()) {
12921 // Call lowering should have added an r2 operand to indicate a dependence
12922 // on the TOC base pointer value. It can't however, because there is no
12923 // way to mark the dependence as implicit there, and so the stackmap code
12924 // will confuse it with a regular operand. Instead, add the dependence
12925 // here.
12926 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12927 }
12928
12929 return emitPatchPoint(MI, BB);
12930 }
12931
12932 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12933 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12934 return emitEHSjLjSetJmp(MI, BB);
12935 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12936 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12937 return emitEHSjLjLongJmp(MI, BB);
12938 }
12939
12940 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12941
12942 // To "insert" these instructions we actually have to insert their
12943 // control-flow patterns.
12944 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12946
12947 MachineFunction *F = BB->getParent();
12948 MachineRegisterInfo &MRI = F->getRegInfo();
12949
12950 if (Subtarget.hasISEL() &&
12951 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12952 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12953 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12955 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12956 MI.getOpcode() == PPC::SELECT_CC_I8)
12957 Cond.push_back(MI.getOperand(4));
12958 else
12960 Cond.push_back(MI.getOperand(1));
12961
12962 DebugLoc dl = MI.getDebugLoc();
12963 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12964 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12965 } else if (IsSelectCC(MI) || IsSelect(MI)) {
12966 // The incoming instruction knows the destination vreg to set, the
12967 // condition code register to branch on, the true/false values to
12968 // select between, and a branch opcode to use.
12969
12970 // thisMBB:
12971 // ...
12972 // TrueVal = ...
12973 // cmpTY ccX, r1, r2
12974 // bCC sinkMBB
12975 // fallthrough --> copy0MBB
12976 MachineBasicBlock *thisMBB = BB;
12977 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12978 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12979 DebugLoc dl = MI.getDebugLoc();
12980 F->insert(It, copy0MBB);
12981 F->insert(It, sinkMBB);
12982
12983 // Set the call frame size on entry to the new basic blocks.
12984 // See https://reviews.llvm.org/D156113.
12985 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12986 copy0MBB->setCallFrameSize(CallFrameSize);
12987 sinkMBB->setCallFrameSize(CallFrameSize);
12988
12989 // Transfer the remainder of BB and its successor edges to sinkMBB.
12990 sinkMBB->splice(sinkMBB->begin(), BB,
12991 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12993
12994 // Next, add the true and fallthrough blocks as its successors.
12995 BB->addSuccessor(copy0MBB);
12996 BB->addSuccessor(sinkMBB);
12997
12998 if (IsSelect(MI)) {
12999 BuildMI(BB, dl, TII->get(PPC::BC))
13000 .addReg(MI.getOperand(1).getReg())
13001 .addMBB(sinkMBB);
13002 } else {
13003 unsigned SelectPred = MI.getOperand(4).getImm();
13004 BuildMI(BB, dl, TII->get(PPC::BCC))
13005 .addImm(SelectPred)
13006 .addReg(MI.getOperand(1).getReg())
13007 .addMBB(sinkMBB);
13008 }
13009
13010 // copy0MBB:
13011 // %FalseValue = ...
13012 // # fallthrough to sinkMBB
13013 BB = copy0MBB;
13014
13015 // Update machine-CFG edges
13016 BB->addSuccessor(sinkMBB);
13017
13018 // sinkMBB:
13019 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13020 // ...
13021 BB = sinkMBB;
13022 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13023 .addReg(MI.getOperand(3).getReg())
13024 .addMBB(copy0MBB)
13025 .addReg(MI.getOperand(2).getReg())
13026 .addMBB(thisMBB);
13027 } else if (MI.getOpcode() == PPC::ReadTB) {
13028 // To read the 64-bit time-base register on a 32-bit target, we read the
13029 // two halves. Should the counter have wrapped while it was being read, we
13030 // need to try again.
13031 // ...
13032 // readLoop:
13033 // mfspr Rx,TBU # load from TBU
13034 // mfspr Ry,TB # load from TB
13035 // mfspr Rz,TBU # load from TBU
13036 // cmpw crX,Rx,Rz # check if 'old'='new'
13037 // bne readLoop # branch if they're not equal
13038 // ...
13039
13040 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13041 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13042 DebugLoc dl = MI.getDebugLoc();
13043 F->insert(It, readMBB);
13044 F->insert(It, sinkMBB);
13045
13046 // Transfer the remainder of BB and its successor edges to sinkMBB.
13047 sinkMBB->splice(sinkMBB->begin(), BB,
13048 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13050
13051 BB->addSuccessor(readMBB);
13052 BB = readMBB;
13053
13054 MachineRegisterInfo &RegInfo = F->getRegInfo();
13055 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13056 Register LoReg = MI.getOperand(0).getReg();
13057 Register HiReg = MI.getOperand(1).getReg();
13058
13059 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13060 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13061 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13062
13063 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13064
13065 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13066 .addReg(HiReg)
13067 .addReg(ReadAgainReg);
13068 BuildMI(BB, dl, TII->get(PPC::BCC))
13070 .addReg(CmpReg)
13071 .addMBB(readMBB);
13072
13073 BB->addSuccessor(readMBB);
13074 BB->addSuccessor(sinkMBB);
13075 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13076 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13077 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13078 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13080 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13082 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13083
13084 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13085 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13086 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13087 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13089 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13091 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13092
13093 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13094 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13095 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13096 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13097 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13098 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13099 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13100 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13101
13102 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13103 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13104 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13105 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13106 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13107 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13108 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13109 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13110
13111 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13112 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13113 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13114 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13115 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13116 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13117 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13118 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13119
13120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13121 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13122 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13123 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13124 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13125 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13126 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13127 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13128
13129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13130 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13131 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13132 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13133 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13134 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13135 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13136 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13137
13138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13139 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13140 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13141 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13142 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13143 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13144 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13145 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13146
13147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13148 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13149 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13150 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13151 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13152 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13153 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13154 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13155
13156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13157 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13158 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13159 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13160 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13161 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13162 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13163 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13164
13165 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13166 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13167 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13168 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13169 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13170 BB = EmitAtomicBinary(MI, BB, 4, 0);
13171 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13172 BB = EmitAtomicBinary(MI, BB, 8, 0);
13173 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13174 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13175 (Subtarget.hasPartwordAtomics() &&
13176 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13177 (Subtarget.hasPartwordAtomics() &&
13178 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13179 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13180
13181 auto LoadMnemonic = PPC::LDARX;
13182 auto StoreMnemonic = PPC::STDCX;
13183 switch (MI.getOpcode()) {
13184 default:
13185 llvm_unreachable("Compare and swap of unknown size");
13186 case PPC::ATOMIC_CMP_SWAP_I8:
13187 LoadMnemonic = PPC::LBARX;
13188 StoreMnemonic = PPC::STBCX;
13189 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13190 break;
13191 case PPC::ATOMIC_CMP_SWAP_I16:
13192 LoadMnemonic = PPC::LHARX;
13193 StoreMnemonic = PPC::STHCX;
13194 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13195 break;
13196 case PPC::ATOMIC_CMP_SWAP_I32:
13197 LoadMnemonic = PPC::LWARX;
13198 StoreMnemonic = PPC::STWCX;
13199 break;
13200 case PPC::ATOMIC_CMP_SWAP_I64:
13201 LoadMnemonic = PPC::LDARX;
13202 StoreMnemonic = PPC::STDCX;
13203 break;
13204 }
13205 MachineRegisterInfo &RegInfo = F->getRegInfo();
13206 Register dest = MI.getOperand(0).getReg();
13207 Register ptrA = MI.getOperand(1).getReg();
13208 Register ptrB = MI.getOperand(2).getReg();
13209 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13210 Register oldval = MI.getOperand(3).getReg();
13211 Register newval = MI.getOperand(4).getReg();
13212 DebugLoc dl = MI.getDebugLoc();
13213
13214 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13215 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13216 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13217 F->insert(It, loop1MBB);
13218 F->insert(It, loop2MBB);
13219 F->insert(It, exitMBB);
13220 exitMBB->splice(exitMBB->begin(), BB,
13221 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13223
13224 // thisMBB:
13225 // ...
13226 // fallthrough --> loopMBB
13227 BB->addSuccessor(loop1MBB);
13228
13229 // loop1MBB:
13230 // l[bhwd]arx dest, ptr
13231 // cmp[wd] dest, oldval
13232 // bne- exitBB
13233 // loop2MBB:
13234 // st[bhwd]cx. newval, ptr
13235 // bne- loopMBB
13236 // b exitBB
13237 // exitBB:
13238 BB = loop1MBB;
13239 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13240 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13241 .addReg(dest)
13242 .addReg(oldval);
13243 BuildMI(BB, dl, TII->get(PPC::BCC))
13245 .addReg(CrReg)
13246 .addMBB(exitMBB);
13247 BB->addSuccessor(loop2MBB);
13248 BB->addSuccessor(exitMBB);
13249
13250 BB = loop2MBB;
13251 BuildMI(BB, dl, TII->get(StoreMnemonic))
13252 .addReg(newval)
13253 .addReg(ptrA)
13254 .addReg(ptrB);
13255 BuildMI(BB, dl, TII->get(PPC::BCC))
13257 .addReg(PPC::CR0)
13258 .addMBB(loop1MBB);
13259 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13260 BB->addSuccessor(loop1MBB);
13261 BB->addSuccessor(exitMBB);
13262
13263 // exitMBB:
13264 // ...
13265 BB = exitMBB;
13266 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13267 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13268 // We must use 64-bit registers for addresses when targeting 64-bit,
13269 // since we're actually doing arithmetic on them. Other registers
13270 // can be 32-bit.
13271 bool is64bit = Subtarget.isPPC64();
13272 bool isLittleEndian = Subtarget.isLittleEndian();
13273 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13274
13275 Register dest = MI.getOperand(0).getReg();
13276 Register ptrA = MI.getOperand(1).getReg();
13277 Register ptrB = MI.getOperand(2).getReg();
13278 Register oldval = MI.getOperand(3).getReg();
13279 Register newval = MI.getOperand(4).getReg();
13280 DebugLoc dl = MI.getDebugLoc();
13281
13282 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13283 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13284 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13285 F->insert(It, loop1MBB);
13286 F->insert(It, loop2MBB);
13287 F->insert(It, exitMBB);
13288 exitMBB->splice(exitMBB->begin(), BB,
13289 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13291
13292 MachineRegisterInfo &RegInfo = F->getRegInfo();
13293 const TargetRegisterClass *RC =
13294 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13295 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13296
13297 Register PtrReg = RegInfo.createVirtualRegister(RC);
13298 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13299 Register ShiftReg =
13300 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13301 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13302 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13303 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13304 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13305 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13306 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13307 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13308 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13309 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13310 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13311 Register Ptr1Reg;
13312 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13313 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13314 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13315 // thisMBB:
13316 // ...
13317 // fallthrough --> loopMBB
13318 BB->addSuccessor(loop1MBB);
13319
13320 // The 4-byte load must be aligned, while a char or short may be
13321 // anywhere in the word. Hence all this nasty bookkeeping code.
13322 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13323 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13324 // xori shift, shift1, 24 [16]
13325 // rlwinm ptr, ptr1, 0, 0, 29
13326 // slw newval2, newval, shift
13327 // slw oldval2, oldval,shift
13328 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13329 // slw mask, mask2, shift
13330 // and newval3, newval2, mask
13331 // and oldval3, oldval2, mask
13332 // loop1MBB:
13333 // lwarx tmpDest, ptr
13334 // and tmp, tmpDest, mask
13335 // cmpw tmp, oldval3
13336 // bne- exitBB
13337 // loop2MBB:
13338 // andc tmp2, tmpDest, mask
13339 // or tmp4, tmp2, newval3
13340 // stwcx. tmp4, ptr
13341 // bne- loop1MBB
13342 // b exitBB
13343 // exitBB:
13344 // srw dest, tmpDest, shift
13345 if (ptrA != ZeroReg) {
13346 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13347 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13348 .addReg(ptrA)
13349 .addReg(ptrB);
13350 } else {
13351 Ptr1Reg = ptrB;
13352 }
13353
13354 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13355 // mode.
13356 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13357 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13358 .addImm(3)
13359 .addImm(27)
13360 .addImm(is8bit ? 28 : 27);
13361 if (!isLittleEndian)
13362 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13363 .addReg(Shift1Reg)
13364 .addImm(is8bit ? 24 : 16);
13365 if (is64bit)
13366 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13367 .addReg(Ptr1Reg)
13368 .addImm(0)
13369 .addImm(61);
13370 else
13371 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13372 .addReg(Ptr1Reg)
13373 .addImm(0)
13374 .addImm(0)
13375 .addImm(29);
13376 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13377 .addReg(newval)
13378 .addReg(ShiftReg);
13379 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13380 .addReg(oldval)
13381 .addReg(ShiftReg);
13382 if (is8bit)
13383 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13384 else {
13385 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13386 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13387 .addReg(Mask3Reg)
13388 .addImm(65535);
13389 }
13390 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13391 .addReg(Mask2Reg)
13392 .addReg(ShiftReg);
13393 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13394 .addReg(NewVal2Reg)
13395 .addReg(MaskReg);
13396 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13397 .addReg(OldVal2Reg)
13398 .addReg(MaskReg);
13399
13400 BB = loop1MBB;
13401 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13402 .addReg(ZeroReg)
13403 .addReg(PtrReg);
13404 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13405 .addReg(TmpDestReg)
13406 .addReg(MaskReg);
13407 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13408 .addReg(TmpReg)
13409 .addReg(OldVal3Reg);
13410 BuildMI(BB, dl, TII->get(PPC::BCC))
13412 .addReg(CrReg)
13413 .addMBB(exitMBB);
13414 BB->addSuccessor(loop2MBB);
13415 BB->addSuccessor(exitMBB);
13416
13417 BB = loop2MBB;
13418 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13419 .addReg(TmpDestReg)
13420 .addReg(MaskReg);
13421 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13422 .addReg(Tmp2Reg)
13423 .addReg(NewVal3Reg);
13424 BuildMI(BB, dl, TII->get(PPC::STWCX))
13425 .addReg(Tmp4Reg)
13426 .addReg(ZeroReg)
13427 .addReg(PtrReg);
13428 BuildMI(BB, dl, TII->get(PPC::BCC))
13430 .addReg(PPC::CR0)
13431 .addMBB(loop1MBB);
13432 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13433 BB->addSuccessor(loop1MBB);
13434 BB->addSuccessor(exitMBB);
13435
13436 // exitMBB:
13437 // ...
13438 BB = exitMBB;
13439 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13440 .addReg(TmpReg)
13441 .addReg(ShiftReg);
13442 } else if (MI.getOpcode() == PPC::FADDrtz) {
13443 // This pseudo performs an FADD with rounding mode temporarily forced
13444 // to round-to-zero. We emit this via custom inserter since the FPSCR
13445 // is not modeled at the SelectionDAG level.
13446 Register Dest = MI.getOperand(0).getReg();
13447 Register Src1 = MI.getOperand(1).getReg();
13448 Register Src2 = MI.getOperand(2).getReg();
13449 DebugLoc dl = MI.getDebugLoc();
13450
13451 MachineRegisterInfo &RegInfo = F->getRegInfo();
13452 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13453
13454 // Save FPSCR value.
13455 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13456
13457 // Set rounding mode to round-to-zero.
13458 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13459 .addImm(31)
13461
13462 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13463 .addImm(30)
13465
13466 // Perform addition.
13467 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13468 .addReg(Src1)
13469 .addReg(Src2);
13470 if (MI.getFlag(MachineInstr::NoFPExcept))
13472
13473 // Restore FPSCR value.
13474 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13475 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13476 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13477 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13478 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13479 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13480 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13481 ? PPC::ANDI8_rec
13482 : PPC::ANDI_rec;
13483 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13484 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13485
13486 MachineRegisterInfo &RegInfo = F->getRegInfo();
13487 Register Dest = RegInfo.createVirtualRegister(
13488 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13489
13490 DebugLoc Dl = MI.getDebugLoc();
13491 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13492 .addReg(MI.getOperand(1).getReg())
13493 .addImm(1);
13494 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13495 MI.getOperand(0).getReg())
13496 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13497 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13498 DebugLoc Dl = MI.getDebugLoc();
13499 MachineRegisterInfo &RegInfo = F->getRegInfo();
13500 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13501 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13502 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13503 MI.getOperand(0).getReg())
13504 .addReg(CRReg);
13505 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13506 DebugLoc Dl = MI.getDebugLoc();
13507 unsigned Imm = MI.getOperand(1).getImm();
13508 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13509 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13510 MI.getOperand(0).getReg())
13511 .addReg(PPC::CR0EQ);
13512 } else if (MI.getOpcode() == PPC::SETRNDi) {
13513 DebugLoc dl = MI.getDebugLoc();
13514 Register OldFPSCRReg = MI.getOperand(0).getReg();
13515
13516 // Save FPSCR value.
13517 if (MRI.use_empty(OldFPSCRReg))
13518 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13519 else
13520 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13521
13522 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13523 // the following settings:
13524 // 00 Round to nearest
13525 // 01 Round to 0
13526 // 10 Round to +inf
13527 // 11 Round to -inf
13528
13529 // When the operand is immediate, using the two least significant bits of
13530 // the immediate to set the bits 62:63 of FPSCR.
13531 unsigned Mode = MI.getOperand(1).getImm();
13532 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13533 .addImm(31)
13535
13536 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13537 .addImm(30)
13539 } else if (MI.getOpcode() == PPC::SETRND) {
13540 DebugLoc dl = MI.getDebugLoc();
13541
13542 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13543 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13544 // If the target doesn't have DirectMove, we should use stack to do the
13545 // conversion, because the target doesn't have the instructions like mtvsrd
13546 // or mfvsrd to do this conversion directly.
13547 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13548 if (Subtarget.hasDirectMove()) {
13549 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13550 .addReg(SrcReg);
13551 } else {
13552 // Use stack to do the register copy.
13553 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13554 MachineRegisterInfo &RegInfo = F->getRegInfo();
13555 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13556 if (RC == &PPC::F8RCRegClass) {
13557 // Copy register from F8RCRegClass to G8RCRegclass.
13558 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13559 "Unsupported RegClass.");
13560
13561 StoreOp = PPC::STFD;
13562 LoadOp = PPC::LD;
13563 } else {
13564 // Copy register from G8RCRegClass to F8RCRegclass.
13565 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13566 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13567 "Unsupported RegClass.");
13568 }
13569
13570 MachineFrameInfo &MFI = F->getFrameInfo();
13571 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13572
13573 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13574 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13576 MFI.getObjectAlign(FrameIdx));
13577
13578 // Store the SrcReg into the stack.
13579 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13580 .addReg(SrcReg)
13581 .addImm(0)
13582 .addFrameIndex(FrameIdx)
13583 .addMemOperand(MMOStore);
13584
13585 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13586 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13588 MFI.getObjectAlign(FrameIdx));
13589
13590 // Load from the stack where SrcReg is stored, and save to DestReg,
13591 // so we have done the RegClass conversion from RegClass::SrcReg to
13592 // RegClass::DestReg.
13593 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13594 .addImm(0)
13595 .addFrameIndex(FrameIdx)
13596 .addMemOperand(MMOLoad);
13597 }
13598 };
13599
13600 Register OldFPSCRReg = MI.getOperand(0).getReg();
13601
13602 // Save FPSCR value.
13603 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13604
13605 // When the operand is gprc register, use two least significant bits of the
13606 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13607 //
13608 // copy OldFPSCRTmpReg, OldFPSCRReg
13609 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13610 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13611 // copy NewFPSCRReg, NewFPSCRTmpReg
13612 // mtfsf 255, NewFPSCRReg
13613 MachineOperand SrcOp = MI.getOperand(1);
13614 MachineRegisterInfo &RegInfo = F->getRegInfo();
13615 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13616
13617 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13618
13619 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13620 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13621
13622 // The first operand of INSERT_SUBREG should be a register which has
13623 // subregisters, we only care about its RegClass, so we should use an
13624 // IMPLICIT_DEF register.
13625 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13626 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13627 .addReg(ImDefReg)
13628 .add(SrcOp)
13629 .addImm(1);
13630
13631 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13632 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13633 .addReg(OldFPSCRTmpReg)
13634 .addReg(ExtSrcReg)
13635 .addImm(0)
13636 .addImm(62);
13637
13638 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13639 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13640
13641 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13642 // bits of FPSCR.
13643 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13644 .addImm(255)
13645 .addReg(NewFPSCRReg)
13646 .addImm(0)
13647 .addImm(0);
13648 } else if (MI.getOpcode() == PPC::SETFLM) {
13649 DebugLoc Dl = MI.getDebugLoc();
13650
13651 // Result of setflm is previous FPSCR content, so we need to save it first.
13652 Register OldFPSCRReg = MI.getOperand(0).getReg();
13653 if (MRI.use_empty(OldFPSCRReg))
13654 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13655 else
13656 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13657
13658 // Put bits in 32:63 to FPSCR.
13659 Register NewFPSCRReg = MI.getOperand(1).getReg();
13660 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13661 .addImm(255)
13662 .addReg(NewFPSCRReg)
13663 .addImm(0)
13664 .addImm(0);
13665 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13666 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13667 return emitProbedAlloca(MI, BB);
13668 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13669 DebugLoc DL = MI.getDebugLoc();
13670 Register Src = MI.getOperand(2).getReg();
13671 Register Lo = MI.getOperand(0).getReg();
13672 Register Hi = MI.getOperand(1).getReg();
13673 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13674 .addDef(Lo)
13675 .addUse(Src, 0, PPC::sub_gp8_x1);
13676 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13677 .addDef(Hi)
13678 .addUse(Src, 0, PPC::sub_gp8_x0);
13679 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13680 MI.getOpcode() == PPC::STQX_PSEUDO) {
13681 DebugLoc DL = MI.getDebugLoc();
13682 // Ptr is used as the ptr_rc_no_r0 part
13683 // of LQ/STQ's memory operand and adding result of RA and RB,
13684 // so it has to be g8rc_and_g8rc_nox0.
13685 Register Ptr =
13686 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13687 Register Val = MI.getOperand(0).getReg();
13688 Register RA = MI.getOperand(1).getReg();
13689 Register RB = MI.getOperand(2).getReg();
13690 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13691 BuildMI(*BB, MI, DL,
13692 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13693 : TII->get(PPC::STQ))
13694 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13695 .addImm(0)
13696 .addReg(Ptr);
13697 } else {
13698 llvm_unreachable("Unexpected instr type to insert");
13699 }
13700
13701 MI.eraseFromParent(); // The pseudo instruction is gone now.
13702 return BB;
13703}
13704
13705//===----------------------------------------------------------------------===//
13706// Target Optimization Hooks
13707//===----------------------------------------------------------------------===//
13708
13709static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13710 // For the estimates, convergence is quadratic, so we essentially double the
13711 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13712 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13713 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13714 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13715 if (VT.getScalarType() == MVT::f64)
13716 RefinementSteps++;
13717 return RefinementSteps;
13718}
13719
13720SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13721 const DenormalMode &Mode) const {
13722 // We only have VSX Vector Test for software Square Root.
13723 EVT VT = Op.getValueType();
13724 if (!isTypeLegal(MVT::i1) ||
13725 (VT != MVT::f64 &&
13726 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13727 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13728
13729 SDLoc DL(Op);
13730 // The output register of FTSQRT is CR field.
13731 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13732 // ftsqrt BF,FRB
13733 // Let e_b be the unbiased exponent of the double-precision
13734 // floating-point operand in register FRB.
13735 // fe_flag is set to 1 if either of the following conditions occurs.
13736 // - The double-precision floating-point operand in register FRB is a zero,
13737 // a NaN, or an infinity, or a negative value.
13738 // - e_b is less than or equal to -970.
13739 // Otherwise fe_flag is set to 0.
13740 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13741 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13742 // exponent is less than -970)
13743 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13744 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13745 FTSQRT, SRIdxVal),
13746 0);
13747}
13748
13749SDValue
13750PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13751 SelectionDAG &DAG) const {
13752 // We only have VSX Vector Square Root.
13753 EVT VT = Op.getValueType();
13754 if (VT != MVT::f64 &&
13755 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13757
13758 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13759}
13760
13761SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13762 int Enabled, int &RefinementSteps,
13763 bool &UseOneConstNR,
13764 bool Reciprocal) const {
13765 EVT VT = Operand.getValueType();
13766 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13767 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13768 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13769 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13770 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13771 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13772
13773 // The Newton-Raphson computation with a single constant does not provide
13774 // enough accuracy on some CPUs.
13775 UseOneConstNR = !Subtarget.needsTwoConstNR();
13776 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13777 }
13778 return SDValue();
13779}
13780
13781SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13782 int Enabled,
13783 int &RefinementSteps) const {
13784 EVT VT = Operand.getValueType();
13785 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13786 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13787 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13788 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13789 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13790 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13791 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13792 }
13793 return SDValue();
13794}
13795
13796unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13797 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13798 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13799 // enabled for division), this functionality is redundant with the default
13800 // combiner logic (once the division -> reciprocal/multiply transformation
13801 // has taken place). As a result, this matters more for older cores than for
13802 // newer ones.
13803
13804 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13805 // reciprocal if there are two or more FDIVs (for embedded cores with only
13806 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13807 switch (Subtarget.getCPUDirective()) {
13808 default:
13809 return 3;
13810 case PPC::DIR_440:
13811 case PPC::DIR_A2:
13812 case PPC::DIR_E500:
13813 case PPC::DIR_E500mc:
13814 case PPC::DIR_E5500:
13815 return 2;
13816 }
13817}
13818
13819// isConsecutiveLSLoc needs to work even if all adds have not yet been
13820// collapsed, and so we need to look through chains of them.
13822 int64_t& Offset, SelectionDAG &DAG) {
13823 if (DAG.isBaseWithConstantOffset(Loc)) {
13824 Base = Loc.getOperand(0);
13825 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13826
13827 // The base might itself be a base plus an offset, and if so, accumulate
13828 // that as well.
13830 }
13831}
13832
13834 unsigned Bytes, int Dist,
13835 SelectionDAG &DAG) {
13836 if (VT.getSizeInBits() / 8 != Bytes)
13837 return false;
13838
13839 SDValue BaseLoc = Base->getBasePtr();
13840 if (Loc.getOpcode() == ISD::FrameIndex) {
13841 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13842 return false;
13844 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13845 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13846 int FS = MFI.getObjectSize(FI);
13847 int BFS = MFI.getObjectSize(BFI);
13848 if (FS != BFS || FS != (int)Bytes) return false;
13849 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13850 }
13851
13852 SDValue Base1 = Loc, Base2 = BaseLoc;
13853 int64_t Offset1 = 0, Offset2 = 0;
13854 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13855 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13856 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13857 return true;
13858
13859 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13860 const GlobalValue *GV1 = nullptr;
13861 const GlobalValue *GV2 = nullptr;
13862 Offset1 = 0;
13863 Offset2 = 0;
13864 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13865 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13866 if (isGA1 && isGA2 && GV1 == GV2)
13867 return Offset1 == (Offset2 + Dist*Bytes);
13868 return false;
13869}
13870
13871// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13872// not enforce equality of the chain operands.
13874 unsigned Bytes, int Dist,
13875 SelectionDAG &DAG) {
13876 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13877 EVT VT = LS->getMemoryVT();
13878 SDValue Loc = LS->getBasePtr();
13879 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13880 }
13881
13882 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13883 EVT VT;
13884 switch (N->getConstantOperandVal(1)) {
13885 default: return false;
13886 case Intrinsic::ppc_altivec_lvx:
13887 case Intrinsic::ppc_altivec_lvxl:
13888 case Intrinsic::ppc_vsx_lxvw4x:
13889 case Intrinsic::ppc_vsx_lxvw4x_be:
13890 VT = MVT::v4i32;
13891 break;
13892 case Intrinsic::ppc_vsx_lxvd2x:
13893 case Intrinsic::ppc_vsx_lxvd2x_be:
13894 VT = MVT::v2f64;
13895 break;
13896 case Intrinsic::ppc_altivec_lvebx:
13897 VT = MVT::i8;
13898 break;
13899 case Intrinsic::ppc_altivec_lvehx:
13900 VT = MVT::i16;
13901 break;
13902 case Intrinsic::ppc_altivec_lvewx:
13903 VT = MVT::i32;
13904 break;
13905 }
13906
13907 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13908 }
13909
13910 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13911 EVT VT;
13912 switch (N->getConstantOperandVal(1)) {
13913 default: return false;
13914 case Intrinsic::ppc_altivec_stvx:
13915 case Intrinsic::ppc_altivec_stvxl:
13916 case Intrinsic::ppc_vsx_stxvw4x:
13917 VT = MVT::v4i32;
13918 break;
13919 case Intrinsic::ppc_vsx_stxvd2x:
13920 VT = MVT::v2f64;
13921 break;
13922 case Intrinsic::ppc_vsx_stxvw4x_be:
13923 VT = MVT::v4i32;
13924 break;
13925 case Intrinsic::ppc_vsx_stxvd2x_be:
13926 VT = MVT::v2f64;
13927 break;
13928 case Intrinsic::ppc_altivec_stvebx:
13929 VT = MVT::i8;
13930 break;
13931 case Intrinsic::ppc_altivec_stvehx:
13932 VT = MVT::i16;
13933 break;
13934 case Intrinsic::ppc_altivec_stvewx:
13935 VT = MVT::i32;
13936 break;
13937 }
13938
13939 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13940 }
13941
13942 return false;
13943}
13944
13945// Return true is there is a nearyby consecutive load to the one provided
13946// (regardless of alignment). We search up and down the chain, looking though
13947// token factors and other loads (but nothing else). As a result, a true result
13948// indicates that it is safe to create a new consecutive load adjacent to the
13949// load provided.
13951 SDValue Chain = LD->getChain();
13952 EVT VT = LD->getMemoryVT();
13953
13954 SmallSet<SDNode *, 16> LoadRoots;
13955 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13956 SmallSet<SDNode *, 16> Visited;
13957
13958 // First, search up the chain, branching to follow all token-factor operands.
13959 // If we find a consecutive load, then we're done, otherwise, record all
13960 // nodes just above the top-level loads and token factors.
13961 while (!Queue.empty()) {
13962 SDNode *ChainNext = Queue.pop_back_val();
13963 if (!Visited.insert(ChainNext).second)
13964 continue;
13965
13966 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13967 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13968 return true;
13969
13970 if (!Visited.count(ChainLD->getChain().getNode()))
13971 Queue.push_back(ChainLD->getChain().getNode());
13972 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13973 for (const SDUse &O : ChainNext->ops())
13974 if (!Visited.count(O.getNode()))
13975 Queue.push_back(O.getNode());
13976 } else
13977 LoadRoots.insert(ChainNext);
13978 }
13979
13980 // Second, search down the chain, starting from the top-level nodes recorded
13981 // in the first phase. These top-level nodes are the nodes just above all
13982 // loads and token factors. Starting with their uses, recursively look though
13983 // all loads (just the chain uses) and token factors to find a consecutive
13984 // load.
13985 Visited.clear();
13986 Queue.clear();
13987
13988 for (SDNode *I : LoadRoots) {
13989 Queue.push_back(I);
13990
13991 while (!Queue.empty()) {
13992 SDNode *LoadRoot = Queue.pop_back_val();
13993 if (!Visited.insert(LoadRoot).second)
13994 continue;
13995
13996 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13997 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13998 return true;
13999
14000 for (SDNode *U : LoadRoot->uses())
14001 if (((isa<MemSDNode>(U) &&
14002 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14003 U->getOpcode() == ISD::TokenFactor) &&
14004 !Visited.count(U))
14005 Queue.push_back(U);
14006 }
14007 }
14008
14009 return false;
14010}
14011
14012/// This function is called when we have proved that a SETCC node can be replaced
14013/// by subtraction (and other supporting instructions) so that the result of
14014/// comparison is kept in a GPR instead of CR. This function is purely for
14015/// codegen purposes and has some flags to guide the codegen process.
14016static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14017 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14018 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14019
14020 // Zero extend the operands to the largest legal integer. Originally, they
14021 // must be of a strictly smaller size.
14022 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14023 DAG.getConstant(Size, DL, MVT::i32));
14024 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14025 DAG.getConstant(Size, DL, MVT::i32));
14026
14027 // Swap if needed. Depends on the condition code.
14028 if (Swap)
14029 std::swap(Op0, Op1);
14030
14031 // Subtract extended integers.
14032 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14033
14034 // Move the sign bit to the least significant position and zero out the rest.
14035 // Now the least significant bit carries the result of original comparison.
14036 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14037 DAG.getConstant(Size - 1, DL, MVT::i32));
14038 auto Final = Shifted;
14039
14040 // Complement the result if needed. Based on the condition code.
14041 if (Complement)
14042 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14043 DAG.getConstant(1, DL, MVT::i64));
14044
14045 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14046}
14047
14048SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14049 DAGCombinerInfo &DCI) const {
14050 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14051
14052 SelectionDAG &DAG = DCI.DAG;
14053 SDLoc DL(N);
14054
14055 // Size of integers being compared has a critical role in the following
14056 // analysis, so we prefer to do this when all types are legal.
14057 if (!DCI.isAfterLegalizeDAG())
14058 return SDValue();
14059
14060 // If all users of SETCC extend its value to a legal integer type
14061 // then we replace SETCC with a subtraction
14062 for (const SDNode *U : N->uses())
14063 if (U->getOpcode() != ISD::ZERO_EXTEND)
14064 return SDValue();
14065
14066 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14067 auto OpSize = N->getOperand(0).getValueSizeInBits();
14068
14070
14071 if (OpSize < Size) {
14072 switch (CC) {
14073 default: break;
14074 case ISD::SETULT:
14075 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14076 case ISD::SETULE:
14077 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14078 case ISD::SETUGT:
14079 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14080 case ISD::SETUGE:
14081 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14082 }
14083 }
14084
14085 return SDValue();
14086}
14087
14088SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14089 DAGCombinerInfo &DCI) const {
14090 SelectionDAG &DAG = DCI.DAG;
14091 SDLoc dl(N);
14092
14093 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14094 // If we're tracking CR bits, we need to be careful that we don't have:
14095 // trunc(binary-ops(zext(x), zext(y)))
14096 // or
14097 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14098 // such that we're unnecessarily moving things into GPRs when it would be
14099 // better to keep them in CR bits.
14100
14101 // Note that trunc here can be an actual i1 trunc, or can be the effective
14102 // truncation that comes from a setcc or select_cc.
14103 if (N->getOpcode() == ISD::TRUNCATE &&
14104 N->getValueType(0) != MVT::i1)
14105 return SDValue();
14106
14107 if (N->getOperand(0).getValueType() != MVT::i32 &&
14108 N->getOperand(0).getValueType() != MVT::i64)
14109 return SDValue();
14110
14111 if (N->getOpcode() == ISD::SETCC ||
14112 N->getOpcode() == ISD::SELECT_CC) {
14113 // If we're looking at a comparison, then we need to make sure that the
14114 // high bits (all except for the first) don't matter the result.
14116 cast<CondCodeSDNode>(N->getOperand(
14117 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14118 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14119
14121 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14122 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14123 return SDValue();
14124 } else if (ISD::isUnsignedIntSetCC(CC)) {
14125 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14126 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14127 !DAG.MaskedValueIsZero(N->getOperand(1),
14128 APInt::getHighBitsSet(OpBits, OpBits-1)))
14129 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14130 : SDValue());
14131 } else {
14132 // This is neither a signed nor an unsigned comparison, just make sure
14133 // that the high bits are equal.
14134 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14135 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14136
14137 // We don't really care about what is known about the first bit (if
14138 // anything), so pretend that it is known zero for both to ensure they can
14139 // be compared as constants.
14140 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14141 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14142
14143 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14144 Op1Known.getConstant() != Op2Known.getConstant())
14145 return SDValue();
14146 }
14147 }
14148
14149 // We now know that the higher-order bits are irrelevant, we just need to
14150 // make sure that all of the intermediate operations are bit operations, and
14151 // all inputs are extensions.
14152 if (N->getOperand(0).getOpcode() != ISD::AND &&
14153 N->getOperand(0).getOpcode() != ISD::OR &&
14154 N->getOperand(0).getOpcode() != ISD::XOR &&
14155 N->getOperand(0).getOpcode() != ISD::SELECT &&
14156 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14157 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14158 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14159 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14160 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14161 return SDValue();
14162
14163 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14164 N->getOperand(1).getOpcode() != ISD::AND &&
14165 N->getOperand(1).getOpcode() != ISD::OR &&
14166 N->getOperand(1).getOpcode() != ISD::XOR &&
14167 N->getOperand(1).getOpcode() != ISD::SELECT &&
14168 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14169 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14170 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14171 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14172 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14173 return SDValue();
14174
14176 SmallVector<SDValue, 8> BinOps, PromOps;
14178
14179 for (unsigned i = 0; i < 2; ++i) {
14180 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14181 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14182 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14183 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14184 isa<ConstantSDNode>(N->getOperand(i)))
14185 Inputs.push_back(N->getOperand(i));
14186 else
14187 BinOps.push_back(N->getOperand(i));
14188
14189 if (N->getOpcode() == ISD::TRUNCATE)
14190 break;
14191 }
14192
14193 // Visit all inputs, collect all binary operations (and, or, xor and
14194 // select) that are all fed by extensions.
14195 while (!BinOps.empty()) {
14196 SDValue BinOp = BinOps.pop_back_val();
14197
14198 if (!Visited.insert(BinOp.getNode()).second)
14199 continue;
14200
14201 PromOps.push_back(BinOp);
14202
14203 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14204 // The condition of the select is not promoted.
14205 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14206 continue;
14207 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14208 continue;
14209
14210 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14211 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14212 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14213 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14214 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14215 Inputs.push_back(BinOp.getOperand(i));
14216 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14217 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14218 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14219 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14220 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14221 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14222 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14223 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14224 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14225 BinOps.push_back(BinOp.getOperand(i));
14226 } else {
14227 // We have an input that is not an extension or another binary
14228 // operation; we'll abort this transformation.
14229 return SDValue();
14230 }
14231 }
14232 }
14233
14234 // Make sure that this is a self-contained cluster of operations (which
14235 // is not quite the same thing as saying that everything has only one
14236 // use).
14237 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14238 if (isa<ConstantSDNode>(Inputs[i]))
14239 continue;
14240
14241 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14242 if (User != N && !Visited.count(User))
14243 return SDValue();
14244
14245 // Make sure that we're not going to promote the non-output-value
14246 // operand(s) or SELECT or SELECT_CC.
14247 // FIXME: Although we could sometimes handle this, and it does occur in
14248 // practice that one of the condition inputs to the select is also one of
14249 // the outputs, we currently can't deal with this.
14250 if (User->getOpcode() == ISD::SELECT) {
14251 if (User->getOperand(0) == Inputs[i])
14252 return SDValue();
14253 } else if (User->getOpcode() == ISD::SELECT_CC) {
14254 if (User->getOperand(0) == Inputs[i] ||
14255 User->getOperand(1) == Inputs[i])
14256 return SDValue();
14257 }
14258 }
14259 }
14260
14261 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14262 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14263 if (User != N && !Visited.count(User))
14264 return SDValue();
14265
14266 // Make sure that we're not going to promote the non-output-value
14267 // operand(s) or SELECT or SELECT_CC.
14268 // FIXME: Although we could sometimes handle this, and it does occur in
14269 // practice that one of the condition inputs to the select is also one of
14270 // the outputs, we currently can't deal with this.
14271 if (User->getOpcode() == ISD::SELECT) {
14272 if (User->getOperand(0) == PromOps[i])
14273 return SDValue();
14274 } else if (User->getOpcode() == ISD::SELECT_CC) {
14275 if (User->getOperand(0) == PromOps[i] ||
14276 User->getOperand(1) == PromOps[i])
14277 return SDValue();
14278 }
14279 }
14280 }
14281
14282 // Replace all inputs with the extension operand.
14283 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14284 // Constants may have users outside the cluster of to-be-promoted nodes,
14285 // and so we need to replace those as we do the promotions.
14286 if (isa<ConstantSDNode>(Inputs[i]))
14287 continue;
14288 else
14289 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14290 }
14291
14292 std::list<HandleSDNode> PromOpHandles;
14293 for (auto &PromOp : PromOps)
14294 PromOpHandles.emplace_back(PromOp);
14295
14296 // Replace all operations (these are all the same, but have a different
14297 // (i1) return type). DAG.getNode will validate that the types of
14298 // a binary operator match, so go through the list in reverse so that
14299 // we've likely promoted both operands first. Any intermediate truncations or
14300 // extensions disappear.
14301 while (!PromOpHandles.empty()) {
14302 SDValue PromOp = PromOpHandles.back().getValue();
14303 PromOpHandles.pop_back();
14304
14305 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14306 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14307 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14308 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14309 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14310 PromOp.getOperand(0).getValueType() != MVT::i1) {
14311 // The operand is not yet ready (see comment below).
14312 PromOpHandles.emplace_front(PromOp);
14313 continue;
14314 }
14315
14316 SDValue RepValue = PromOp.getOperand(0);
14317 if (isa<ConstantSDNode>(RepValue))
14318 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14319
14320 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14321 continue;
14322 }
14323
14324 unsigned C;
14325 switch (PromOp.getOpcode()) {
14326 default: C = 0; break;
14327 case ISD::SELECT: C = 1; break;
14328 case ISD::SELECT_CC: C = 2; break;
14329 }
14330
14331 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14332 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14333 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14334 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14335 // The to-be-promoted operands of this node have not yet been
14336 // promoted (this should be rare because we're going through the
14337 // list backward, but if one of the operands has several users in
14338 // this cluster of to-be-promoted nodes, it is possible).
14339 PromOpHandles.emplace_front(PromOp);
14340 continue;
14341 }
14342
14344 PromOp.getNode()->op_end());
14345
14346 // If there are any constant inputs, make sure they're replaced now.
14347 for (unsigned i = 0; i < 2; ++i)
14348 if (isa<ConstantSDNode>(Ops[C+i]))
14349 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14350
14351 DAG.ReplaceAllUsesOfValueWith(PromOp,
14352 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14353 }
14354
14355 // Now we're left with the initial truncation itself.
14356 if (N->getOpcode() == ISD::TRUNCATE)
14357 return N->getOperand(0);
14358
14359 // Otherwise, this is a comparison. The operands to be compared have just
14360 // changed type (to i1), but everything else is the same.
14361 return SDValue(N, 0);
14362}
14363
14364SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14365 DAGCombinerInfo &DCI) const {
14366 SelectionDAG &DAG = DCI.DAG;
14367 SDLoc dl(N);
14368
14369 // If we're tracking CR bits, we need to be careful that we don't have:
14370 // zext(binary-ops(trunc(x), trunc(y)))
14371 // or
14372 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14373 // such that we're unnecessarily moving things into CR bits that can more
14374 // efficiently stay in GPRs. Note that if we're not certain that the high
14375 // bits are set as required by the final extension, we still may need to do
14376 // some masking to get the proper behavior.
14377
14378 // This same functionality is important on PPC64 when dealing with
14379 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14380 // the return values of functions. Because it is so similar, it is handled
14381 // here as well.
14382
14383 if (N->getValueType(0) != MVT::i32 &&
14384 N->getValueType(0) != MVT::i64)
14385 return SDValue();
14386
14387 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14388 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14389 return SDValue();
14390
14391 if (N->getOperand(0).getOpcode() != ISD::AND &&
14392 N->getOperand(0).getOpcode() != ISD::OR &&
14393 N->getOperand(0).getOpcode() != ISD::XOR &&
14394 N->getOperand(0).getOpcode() != ISD::SELECT &&
14395 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14396 return SDValue();
14397
14399 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14401
14402 // Visit all inputs, collect all binary operations (and, or, xor and
14403 // select) that are all fed by truncations.
14404 while (!BinOps.empty()) {
14405 SDValue BinOp = BinOps.pop_back_val();
14406
14407 if (!Visited.insert(BinOp.getNode()).second)
14408 continue;
14409
14410 PromOps.push_back(BinOp);
14411
14412 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14413 // The condition of the select is not promoted.
14414 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14415 continue;
14416 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14417 continue;
14418
14419 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14420 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14421 Inputs.push_back(BinOp.getOperand(i));
14422 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14423 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14424 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14425 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14426 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14427 BinOps.push_back(BinOp.getOperand(i));
14428 } else {
14429 // We have an input that is not a truncation or another binary
14430 // operation; we'll abort this transformation.
14431 return SDValue();
14432 }
14433 }
14434 }
14435
14436 // The operands of a select that must be truncated when the select is
14437 // promoted because the operand is actually part of the to-be-promoted set.
14438 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14439
14440 // Make sure that this is a self-contained cluster of operations (which
14441 // is not quite the same thing as saying that everything has only one
14442 // use).
14443 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14444 if (isa<ConstantSDNode>(Inputs[i]))
14445 continue;
14446
14447 for (SDNode *User : Inputs[i].getNode()->uses()) {
14448 if (User != N && !Visited.count(User))
14449 return SDValue();
14450
14451 // If we're going to promote the non-output-value operand(s) or SELECT or
14452 // SELECT_CC, record them for truncation.
14453 if (User->getOpcode() == ISD::SELECT) {
14454 if (User->getOperand(0) == Inputs[i])
14455 SelectTruncOp[0].insert(std::make_pair(User,
14456 User->getOperand(0).getValueType()));
14457 } else if (User->getOpcode() == ISD::SELECT_CC) {
14458 if (User->getOperand(0) == Inputs[i])
14459 SelectTruncOp[0].insert(std::make_pair(User,
14460 User->getOperand(0).getValueType()));
14461 if (User->getOperand(1) == Inputs[i])
14462 SelectTruncOp[1].insert(std::make_pair(User,
14463 User->getOperand(1).getValueType()));
14464 }
14465 }
14466 }
14467
14468 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14469 for (SDNode *User : PromOps[i].getNode()->uses()) {
14470 if (User != N && !Visited.count(User))
14471 return SDValue();
14472
14473 // If we're going to promote the non-output-value operand(s) or SELECT or
14474 // SELECT_CC, record them for truncation.
14475 if (User->getOpcode() == ISD::SELECT) {
14476 if (User->getOperand(0) == PromOps[i])
14477 SelectTruncOp[0].insert(std::make_pair(User,
14478 User->getOperand(0).getValueType()));
14479 } else if (User->getOpcode() == ISD::SELECT_CC) {
14480 if (User->getOperand(0) == PromOps[i])
14481 SelectTruncOp[0].insert(std::make_pair(User,
14482 User->getOperand(0).getValueType()));
14483 if (User->getOperand(1) == PromOps[i])
14484 SelectTruncOp[1].insert(std::make_pair(User,
14485 User->getOperand(1).getValueType()));
14486 }
14487 }
14488 }
14489
14490 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14491 bool ReallyNeedsExt = false;
14492 if (N->getOpcode() != ISD::ANY_EXTEND) {
14493 // If all of the inputs are not already sign/zero extended, then
14494 // we'll still need to do that at the end.
14495 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14496 if (isa<ConstantSDNode>(Inputs[i]))
14497 continue;
14498
14499 unsigned OpBits =
14500 Inputs[i].getOperand(0).getValueSizeInBits();
14501 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14502
14503 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14504 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14505 APInt::getHighBitsSet(OpBits,
14506 OpBits-PromBits))) ||
14507 (N->getOpcode() == ISD::SIGN_EXTEND &&
14508 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14509 (OpBits-(PromBits-1)))) {
14510 ReallyNeedsExt = true;
14511 break;
14512 }
14513 }
14514 }
14515
14516 // Replace all inputs, either with the truncation operand, or a
14517 // truncation or extension to the final output type.
14518 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14519 // Constant inputs need to be replaced with the to-be-promoted nodes that
14520 // use them because they might have users outside of the cluster of
14521 // promoted nodes.
14522 if (isa<ConstantSDNode>(Inputs[i]))
14523 continue;
14524
14525 SDValue InSrc = Inputs[i].getOperand(0);
14526 if (Inputs[i].getValueType() == N->getValueType(0))
14527 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14528 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14529 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14530 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14531 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14532 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14533 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14534 else
14535 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14536 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14537 }
14538
14539 std::list<HandleSDNode> PromOpHandles;
14540 for (auto &PromOp : PromOps)
14541 PromOpHandles.emplace_back(PromOp);
14542
14543 // Replace all operations (these are all the same, but have a different
14544 // (promoted) return type). DAG.getNode will validate that the types of
14545 // a binary operator match, so go through the list in reverse so that
14546 // we've likely promoted both operands first.
14547 while (!PromOpHandles.empty()) {
14548 SDValue PromOp = PromOpHandles.back().getValue();
14549 PromOpHandles.pop_back();
14550
14551 unsigned C;
14552 switch (PromOp.getOpcode()) {
14553 default: C = 0; break;
14554 case ISD::SELECT: C = 1; break;
14555 case ISD::SELECT_CC: C = 2; break;
14556 }
14557
14558 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14559 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14560 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14561 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14562 // The to-be-promoted operands of this node have not yet been
14563 // promoted (this should be rare because we're going through the
14564 // list backward, but if one of the operands has several users in
14565 // this cluster of to-be-promoted nodes, it is possible).
14566 PromOpHandles.emplace_front(PromOp);
14567 continue;
14568 }
14569
14570 // For SELECT and SELECT_CC nodes, we do a similar check for any
14571 // to-be-promoted comparison inputs.
14572 if (PromOp.getOpcode() == ISD::SELECT ||
14573 PromOp.getOpcode() == ISD::SELECT_CC) {
14574 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14575 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14576 (SelectTruncOp[1].count(PromOp.getNode()) &&
14577 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14578 PromOpHandles.emplace_front(PromOp);
14579 continue;
14580 }
14581 }
14582
14584 PromOp.getNode()->op_end());
14585
14586 // If this node has constant inputs, then they'll need to be promoted here.
14587 for (unsigned i = 0; i < 2; ++i) {
14588 if (!isa<ConstantSDNode>(Ops[C+i]))
14589 continue;
14590 if (Ops[C+i].getValueType() == N->getValueType(0))
14591 continue;
14592
14593 if (N->getOpcode() == ISD::SIGN_EXTEND)
14594 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14595 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14596 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14597 else
14598 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14599 }
14600
14601 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14602 // truncate them again to the original value type.
14603 if (PromOp.getOpcode() == ISD::SELECT ||
14604 PromOp.getOpcode() == ISD::SELECT_CC) {
14605 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14606 if (SI0 != SelectTruncOp[0].end())
14607 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14608 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14609 if (SI1 != SelectTruncOp[1].end())
14610 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14611 }
14612
14613 DAG.ReplaceAllUsesOfValueWith(PromOp,
14614 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14615 }
14616
14617 // Now we're left with the initial extension itself.
14618 if (!ReallyNeedsExt)
14619 return N->getOperand(0);
14620
14621 // To zero extend, just mask off everything except for the first bit (in the
14622 // i1 case).
14623 if (N->getOpcode() == ISD::ZERO_EXTEND)
14624 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14626 N->getValueSizeInBits(0), PromBits),
14627 dl, N->getValueType(0)));
14628
14629 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14630 "Invalid extension type");
14631 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14632 SDValue ShiftCst =
14633 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14634 return DAG.getNode(
14635 ISD::SRA, dl, N->getValueType(0),
14636 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14637 ShiftCst);
14638}
14639
14640SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14641 DAGCombinerInfo &DCI) const {
14642 assert(N->getOpcode() == ISD::SETCC &&
14643 "Should be called with a SETCC node");
14644
14645 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14646 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14647 SDValue LHS = N->getOperand(0);
14648 SDValue RHS = N->getOperand(1);
14649
14650 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14651 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14652 LHS.hasOneUse())
14653 std::swap(LHS, RHS);
14654
14655 // x == 0-y --> x+y == 0
14656 // x != 0-y --> x+y != 0
14657 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14658 RHS.hasOneUse()) {
14659 SDLoc DL(N);
14660 SelectionDAG &DAG = DCI.DAG;
14661 EVT VT = N->getValueType(0);
14662 EVT OpVT = LHS.getValueType();
14663 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14664 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14665 }
14666 }
14667
14668 return DAGCombineTruncBoolExt(N, DCI);
14669}
14670
14671// Is this an extending load from an f32 to an f64?
14672static bool isFPExtLoad(SDValue Op) {
14673 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14674 return LD->getExtensionType() == ISD::EXTLOAD &&
14675 Op.getValueType() == MVT::f64;
14676 return false;
14677}
14678
14679/// Reduces the number of fp-to-int conversion when building a vector.
14680///
14681/// If this vector is built out of floating to integer conversions,
14682/// transform it to a vector built out of floating point values followed by a
14683/// single floating to integer conversion of the vector.
14684/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14685/// becomes (fptosi (build_vector ($A, $B, ...)))
14686SDValue PPCTargetLowering::
14687combineElementTruncationToVectorTruncation(SDNode *N,
14688 DAGCombinerInfo &DCI) const {
14689 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14690 "Should be called with a BUILD_VECTOR node");
14691
14692 SelectionDAG &DAG = DCI.DAG;
14693 SDLoc dl(N);
14694
14695 SDValue FirstInput = N->getOperand(0);
14696 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14697 "The input operand must be an fp-to-int conversion.");
14698
14699 // This combine happens after legalization so the fp_to_[su]i nodes are
14700 // already converted to PPCSISD nodes.
14701 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14702 if (FirstConversion == PPCISD::FCTIDZ ||
14703 FirstConversion == PPCISD::FCTIDUZ ||
14704 FirstConversion == PPCISD::FCTIWZ ||
14705 FirstConversion == PPCISD::FCTIWUZ) {
14706 bool IsSplat = true;
14707 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14708 FirstConversion == PPCISD::FCTIWUZ;
14709 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14711 EVT TargetVT = N->getValueType(0);
14712 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14713 SDValue NextOp = N->getOperand(i);
14714 if (NextOp.getOpcode() != PPCISD::MFVSR)
14715 return SDValue();
14716 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14717 if (NextConversion != FirstConversion)
14718 return SDValue();
14719 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14720 // This is not valid if the input was originally double precision. It is
14721 // also not profitable to do unless this is an extending load in which
14722 // case doing this combine will allow us to combine consecutive loads.
14723 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14724 return SDValue();
14725 if (N->getOperand(i) != FirstInput)
14726 IsSplat = false;
14727 }
14728
14729 // If this is a splat, we leave it as-is since there will be only a single
14730 // fp-to-int conversion followed by a splat of the integer. This is better
14731 // for 32-bit and smaller ints and neutral for 64-bit ints.
14732 if (IsSplat)
14733 return SDValue();
14734
14735 // Now that we know we have the right type of node, get its operands
14736 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14737 SDValue In = N->getOperand(i).getOperand(0);
14738 if (Is32Bit) {
14739 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14740 // here, we know that all inputs are extending loads so this is safe).
14741 if (In.isUndef())
14742 Ops.push_back(DAG.getUNDEF(SrcVT));
14743 else {
14744 SDValue Trunc =
14745 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14746 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14747 Ops.push_back(Trunc);
14748 }
14749 } else
14750 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14751 }
14752
14753 unsigned Opcode;
14754 if (FirstConversion == PPCISD::FCTIDZ ||
14755 FirstConversion == PPCISD::FCTIWZ)
14756 Opcode = ISD::FP_TO_SINT;
14757 else
14758 Opcode = ISD::FP_TO_UINT;
14759
14760 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14761 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14762 return DAG.getNode(Opcode, dl, TargetVT, BV);
14763 }
14764 return SDValue();
14765}
14766
14767/// Reduce the number of loads when building a vector.
14768///
14769/// Building a vector out of multiple loads can be converted to a load
14770/// of the vector type if the loads are consecutive. If the loads are
14771/// consecutive but in descending order, a shuffle is added at the end
14772/// to reorder the vector.
14774 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14775 "Should be called with a BUILD_VECTOR node");
14776
14777 SDLoc dl(N);
14778
14779 // Return early for non byte-sized type, as they can't be consecutive.
14780 if (!N->getValueType(0).getVectorElementType().isByteSized())
14781 return SDValue();
14782
14783 bool InputsAreConsecutiveLoads = true;
14784 bool InputsAreReverseConsecutive = true;
14785 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14786 SDValue FirstInput = N->getOperand(0);
14787 bool IsRoundOfExtLoad = false;
14788 LoadSDNode *FirstLoad = nullptr;
14789
14790 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14791 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14792 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14793 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14794 }
14795 // Not a build vector of (possibly fp_rounded) loads.
14796 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14797 N->getNumOperands() == 1)
14798 return SDValue();
14799
14800 if (!IsRoundOfExtLoad)
14801 FirstLoad = cast<LoadSDNode>(FirstInput);
14802
14804 InputLoads.push_back(FirstLoad);
14805 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14806 // If any inputs are fp_round(extload), they all must be.
14807 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14808 return SDValue();
14809
14810 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14811 N->getOperand(i);
14812 if (NextInput.getOpcode() != ISD::LOAD)
14813 return SDValue();
14814
14815 SDValue PreviousInput =
14816 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14817 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14818 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14819
14820 // If any inputs are fp_round(extload), they all must be.
14821 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14822 return SDValue();
14823
14824 // We only care about regular loads. The PPC-specific load intrinsics
14825 // will not lead to a merge opportunity.
14826 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14827 InputsAreConsecutiveLoads = false;
14828 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14829 InputsAreReverseConsecutive = false;
14830
14831 // Exit early if the loads are neither consecutive nor reverse consecutive.
14832 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14833 return SDValue();
14834 InputLoads.push_back(LD2);
14835 }
14836
14837 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14838 "The loads cannot be both consecutive and reverse consecutive.");
14839
14840 SDValue WideLoad;
14841 SDValue ReturnSDVal;
14842 if (InputsAreConsecutiveLoads) {
14843 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14844 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14845 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14846 FirstLoad->getAlign());
14847 ReturnSDVal = WideLoad;
14848 } else if (InputsAreReverseConsecutive) {
14849 LoadSDNode *LastLoad = InputLoads.back();
14850 assert(LastLoad && "Input needs to be a LoadSDNode.");
14851 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14852 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14853 LastLoad->getAlign());
14855 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14856 Ops.push_back(i);
14857
14858 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14859 DAG.getUNDEF(N->getValueType(0)), Ops);
14860 } else
14861 return SDValue();
14862
14863 for (auto *LD : InputLoads)
14864 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14865 return ReturnSDVal;
14866}
14867
14868// This function adds the required vector_shuffle needed to get
14869// the elements of the vector extract in the correct position
14870// as specified by the CorrectElems encoding.
14872 SDValue Input, uint64_t Elems,
14873 uint64_t CorrectElems) {
14874 SDLoc dl(N);
14875
14876 unsigned NumElems = Input.getValueType().getVectorNumElements();
14877 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14878
14879 // Knowing the element indices being extracted from the original
14880 // vector and the order in which they're being inserted, just put
14881 // them at element indices required for the instruction.
14882 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14883 if (DAG.getDataLayout().isLittleEndian())
14884 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14885 else
14886 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14887 CorrectElems = CorrectElems >> 8;
14888 Elems = Elems >> 8;
14889 }
14890
14891 SDValue Shuffle =
14892 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14893 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14894
14895 EVT VT = N->getValueType(0);
14896 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14897
14898 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14901 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14902 DAG.getValueType(ExtVT));
14903}
14904
14905// Look for build vector patterns where input operands come from sign
14906// extended vector_extract elements of specific indices. If the correct indices
14907// aren't used, add a vector shuffle to fix up the indices and create
14908// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14909// during instruction selection.
14911 // This array encodes the indices that the vector sign extend instructions
14912 // extract from when extending from one type to another for both BE and LE.
14913 // The right nibble of each byte corresponds to the LE incides.
14914 // and the left nibble of each byte corresponds to the BE incides.
14915 // For example: 0x3074B8FC byte->word
14916 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14917 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14918 // For example: 0x000070F8 byte->double word
14919 // For LE: the allowed indices are: 0x0,0x8
14920 // For BE: the allowed indices are: 0x7,0xF
14921 uint64_t TargetElems[] = {
14922 0x3074B8FC, // b->w
14923 0x000070F8, // b->d
14924 0x10325476, // h->w
14925 0x00003074, // h->d
14926 0x00001032, // w->d
14927 };
14928
14929 uint64_t Elems = 0;
14930 int Index;
14931 SDValue Input;
14932
14933 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14934 if (!Op)
14935 return false;
14936 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14937 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14938 return false;
14939
14940 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14941 // of the right width.
14942 SDValue Extract = Op.getOperand(0);
14943 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14944 Extract = Extract.getOperand(0);
14945 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14946 return false;
14947
14948 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14949 if (!ExtOp)
14950 return false;
14951
14952 Index = ExtOp->getZExtValue();
14953 if (Input && Input != Extract.getOperand(0))
14954 return false;
14955
14956 if (!Input)
14957 Input = Extract.getOperand(0);
14958
14959 Elems = Elems << 8;
14960 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14961 Elems |= Index;
14962
14963 return true;
14964 };
14965
14966 // If the build vector operands aren't sign extended vector extracts,
14967 // of the same input vector, then return.
14968 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14969 if (!isSExtOfVecExtract(N->getOperand(i))) {
14970 return SDValue();
14971 }
14972 }
14973
14974 // If the vector extract indices are not correct, add the appropriate
14975 // vector_shuffle.
14976 int TgtElemArrayIdx;
14977 int InputSize = Input.getValueType().getScalarSizeInBits();
14978 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14979 if (InputSize + OutputSize == 40)
14980 TgtElemArrayIdx = 0;
14981 else if (InputSize + OutputSize == 72)
14982 TgtElemArrayIdx = 1;
14983 else if (InputSize + OutputSize == 48)
14984 TgtElemArrayIdx = 2;
14985 else if (InputSize + OutputSize == 80)
14986 TgtElemArrayIdx = 3;
14987 else if (InputSize + OutputSize == 96)
14988 TgtElemArrayIdx = 4;
14989 else
14990 return SDValue();
14991
14992 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14993 CorrectElems = DAG.getDataLayout().isLittleEndian()
14994 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14995 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14996 if (Elems != CorrectElems) {
14997 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14998 }
14999
15000 // Regular lowering will catch cases where a shuffle is not needed.
15001 return SDValue();
15002}
15003
15004// Look for the pattern of a load from a narrow width to i128, feeding
15005// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15006// (LXVRZX). This node represents a zero extending load that will be matched
15007// to the Load VSX Vector Rightmost instructions.
15009 SDLoc DL(N);
15010
15011 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15012 if (N->getValueType(0) != MVT::v1i128)
15013 return SDValue();
15014
15015 SDValue Operand = N->getOperand(0);
15016 // Proceed with the transformation if the operand to the BUILD_VECTOR
15017 // is a load instruction.
15018 if (Operand.getOpcode() != ISD::LOAD)
15019 return SDValue();
15020
15021 auto *LD = cast<LoadSDNode>(Operand);
15022 EVT MemoryType = LD->getMemoryVT();
15023
15024 // This transformation is only valid if the we are loading either a byte,
15025 // halfword, word, or doubleword.
15026 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15027 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15028
15029 // Ensure that the load from the narrow width is being zero extended to i128.
15030 if (!ValidLDType ||
15031 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15032 LD->getExtensionType() != ISD::EXTLOAD))
15033 return SDValue();
15034
15035 SDValue LoadOps[] = {
15036 LD->getChain(), LD->getBasePtr(),
15037 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15038
15040 DAG.getVTList(MVT::v1i128, MVT::Other),
15041 LoadOps, MemoryType, LD->getMemOperand());
15042}
15043
15044SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15045 DAGCombinerInfo &DCI) const {
15046 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15047 "Should be called with a BUILD_VECTOR node");
15048
15049 SelectionDAG &DAG = DCI.DAG;
15050 SDLoc dl(N);
15051
15052 if (!Subtarget.hasVSX())
15053 return SDValue();
15054
15055 // The target independent DAG combiner will leave a build_vector of
15056 // float-to-int conversions intact. We can generate MUCH better code for
15057 // a float-to-int conversion of a vector of floats.
15058 SDValue FirstInput = N->getOperand(0);
15059 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15060 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15061 if (Reduced)
15062 return Reduced;
15063 }
15064
15065 // If we're building a vector out of consecutive loads, just load that
15066 // vector type.
15067 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15068 if (Reduced)
15069 return Reduced;
15070
15071 // If we're building a vector out of extended elements from another vector
15072 // we have P9 vector integer extend instructions. The code assumes legal
15073 // input types (i.e. it can't handle things like v4i16) so do not run before
15074 // legalization.
15075 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15076 Reduced = combineBVOfVecSExt(N, DAG);
15077 if (Reduced)
15078 return Reduced;
15079 }
15080
15081 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15082 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15083 // is a load from <valid narrow width> to i128.
15084 if (Subtarget.isISA3_1()) {
15085 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15086 if (BVOfZLoad)
15087 return BVOfZLoad;
15088 }
15089
15090 if (N->getValueType(0) != MVT::v2f64)
15091 return SDValue();
15092
15093 // Looking for:
15094 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15095 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15096 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15097 return SDValue();
15098 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15099 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15100 return SDValue();
15101 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15102 return SDValue();
15103
15104 SDValue Ext1 = FirstInput.getOperand(0);
15105 SDValue Ext2 = N->getOperand(1).getOperand(0);
15106 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15108 return SDValue();
15109
15110 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15111 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15112 if (!Ext1Op || !Ext2Op)
15113 return SDValue();
15114 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15115 Ext1.getOperand(0) != Ext2.getOperand(0))
15116 return SDValue();
15117
15118 int FirstElem = Ext1Op->getZExtValue();
15119 int SecondElem = Ext2Op->getZExtValue();
15120 int SubvecIdx;
15121 if (FirstElem == 0 && SecondElem == 1)
15122 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15123 else if (FirstElem == 2 && SecondElem == 3)
15124 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15125 else
15126 return SDValue();
15127
15128 SDValue SrcVec = Ext1.getOperand(0);
15129 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15131 return DAG.getNode(NodeType, dl, MVT::v2f64,
15132 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15133}
15134
15135SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15136 DAGCombinerInfo &DCI) const {
15137 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15138 N->getOpcode() == ISD::UINT_TO_FP) &&
15139 "Need an int -> FP conversion node here");
15140
15141 if (useSoftFloat() || !Subtarget.has64BitSupport())
15142 return SDValue();
15143
15144 SelectionDAG &DAG = DCI.DAG;
15145 SDLoc dl(N);
15146 SDValue Op(N, 0);
15147
15148 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15149 // from the hardware.
15150 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15151 return SDValue();
15152 if (!Op.getOperand(0).getValueType().isSimple())
15153 return SDValue();
15154 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15155 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15156 return SDValue();
15157
15158 SDValue FirstOperand(Op.getOperand(0));
15159 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15160 (FirstOperand.getValueType() == MVT::i8 ||
15161 FirstOperand.getValueType() == MVT::i16);
15162 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15163 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15164 bool DstDouble = Op.getValueType() == MVT::f64;
15165 unsigned ConvOp = Signed ?
15166 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15167 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15168 SDValue WidthConst =
15169 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15170 dl, false);
15171 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15172 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15174 DAG.getVTList(MVT::f64, MVT::Other),
15175 Ops, MVT::i8, LDN->getMemOperand());
15176 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15177
15178 // For signed conversion, we need to sign-extend the value in the VSR
15179 if (Signed) {
15180 SDValue ExtOps[] = { Ld, WidthConst };
15181 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15182 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15183 } else
15184 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15185 }
15186
15187
15188 // For i32 intermediate values, unfortunately, the conversion functions
15189 // leave the upper 32 bits of the value are undefined. Within the set of
15190 // scalar instructions, we have no method for zero- or sign-extending the
15191 // value. Thus, we cannot handle i32 intermediate values here.
15192 if (Op.getOperand(0).getValueType() == MVT::i32)
15193 return SDValue();
15194
15195 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15196 "UINT_TO_FP is supported only with FPCVT");
15197
15198 // If we have FCFIDS, then use it when converting to single-precision.
15199 // Otherwise, convert to double-precision and then round.
15200 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15201 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15203 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15204 : PPCISD::FCFID);
15205 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15206 ? MVT::f32
15207 : MVT::f64;
15208
15209 // If we're converting from a float, to an int, and back to a float again,
15210 // then we don't need the store/load pair at all.
15211 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15212 Subtarget.hasFPCVT()) ||
15213 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15214 SDValue Src = Op.getOperand(0).getOperand(0);
15215 if (Src.getValueType() == MVT::f32) {
15216 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15217 DCI.AddToWorklist(Src.getNode());
15218 } else if (Src.getValueType() != MVT::f64) {
15219 // Make sure that we don't pick up a ppc_fp128 source value.
15220 return SDValue();
15221 }
15222
15223 unsigned FCTOp =
15224 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15226
15227 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15228 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15229
15230 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15231 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15232 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15233 DCI.AddToWorklist(FP.getNode());
15234 }
15235
15236 return FP;
15237 }
15238
15239 return SDValue();
15240}
15241
15242// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15243// builtins) into loads with swaps.
15245 DAGCombinerInfo &DCI) const {
15246 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15247 // load combines.
15248 if (DCI.isBeforeLegalizeOps())
15249 return SDValue();
15250
15251 SelectionDAG &DAG = DCI.DAG;
15252 SDLoc dl(N);
15253 SDValue Chain;
15254 SDValue Base;
15255 MachineMemOperand *MMO;
15256
15257 switch (N->getOpcode()) {
15258 default:
15259 llvm_unreachable("Unexpected opcode for little endian VSX load");
15260 case ISD::LOAD: {
15261 LoadSDNode *LD = cast<LoadSDNode>(N);
15262 Chain = LD->getChain();
15263 Base = LD->getBasePtr();
15264 MMO = LD->getMemOperand();
15265 // If the MMO suggests this isn't a load of a full vector, leave
15266 // things alone. For a built-in, we have to make the change for
15267 // correctness, so if there is a size problem that will be a bug.
15268 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15269 return SDValue();
15270 break;
15271 }
15273 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15274 Chain = Intrin->getChain();
15275 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15276 // us what we want. Get operand 2 instead.
15277 Base = Intrin->getOperand(2);
15278 MMO = Intrin->getMemOperand();
15279 break;
15280 }
15281 }
15282
15283 MVT VecTy = N->getValueType(0).getSimpleVT();
15284
15285 SDValue LoadOps[] = { Chain, Base };
15287 DAG.getVTList(MVT::v2f64, MVT::Other),
15288 LoadOps, MVT::v2f64, MMO);
15289
15290 DCI.AddToWorklist(Load.getNode());
15291 Chain = Load.getValue(1);
15292 SDValue Swap = DAG.getNode(
15293 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15294 DCI.AddToWorklist(Swap.getNode());
15295
15296 // Add a bitcast if the resulting load type doesn't match v2f64.
15297 if (VecTy != MVT::v2f64) {
15298 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15299 DCI.AddToWorklist(N.getNode());
15300 // Package {bitcast value, swap's chain} to match Load's shape.
15301 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15302 N, Swap.getValue(1));
15303 }
15304
15305 return Swap;
15306}
15307
15308// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15309// builtins) into stores with swaps.
15311 DAGCombinerInfo &DCI) const {
15312 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15313 // store combines.
15314 if (DCI.isBeforeLegalizeOps())
15315 return SDValue();
15316
15317 SelectionDAG &DAG = DCI.DAG;
15318 SDLoc dl(N);
15319 SDValue Chain;
15320 SDValue Base;
15321 unsigned SrcOpnd;
15322 MachineMemOperand *MMO;
15323
15324 switch (N->getOpcode()) {
15325 default:
15326 llvm_unreachable("Unexpected opcode for little endian VSX store");
15327 case ISD::STORE: {
15328 StoreSDNode *ST = cast<StoreSDNode>(N);
15329 Chain = ST->getChain();
15330 Base = ST->getBasePtr();
15331 MMO = ST->getMemOperand();
15332 SrcOpnd = 1;
15333 // If the MMO suggests this isn't a store of a full vector, leave
15334 // things alone. For a built-in, we have to make the change for
15335 // correctness, so if there is a size problem that will be a bug.
15336 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15337 return SDValue();
15338 break;
15339 }
15340 case ISD::INTRINSIC_VOID: {
15341 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15342 Chain = Intrin->getChain();
15343 // Intrin->getBasePtr() oddly does not get what we want.
15344 Base = Intrin->getOperand(3);
15345 MMO = Intrin->getMemOperand();
15346 SrcOpnd = 2;
15347 break;
15348 }
15349 }
15350
15351 SDValue Src = N->getOperand(SrcOpnd);
15352 MVT VecTy = Src.getValueType().getSimpleVT();
15353
15354 // All stores are done as v2f64 and possible bit cast.
15355 if (VecTy != MVT::v2f64) {
15356 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15357 DCI.AddToWorklist(Src.getNode());
15358 }
15359
15360 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15361 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15362 DCI.AddToWorklist(Swap.getNode());
15363 Chain = Swap.getValue(1);
15364 SDValue StoreOps[] = { Chain, Swap, Base };
15366 DAG.getVTList(MVT::Other),
15367 StoreOps, VecTy, MMO);
15368 DCI.AddToWorklist(Store.getNode());
15369 return Store;
15370}
15371
15372// Handle DAG combine for STORE (FP_TO_INT F).
15373SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15374 DAGCombinerInfo &DCI) const {
15375 SelectionDAG &DAG = DCI.DAG;
15376 SDLoc dl(N);
15377 unsigned Opcode = N->getOperand(1).getOpcode();
15378 (void)Opcode;
15379 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15380
15381 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15382 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15383 && "Not a FP_TO_INT Instruction!");
15384
15385 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15386 EVT Op1VT = N->getOperand(1).getValueType();
15387 EVT ResVT = Val.getValueType();
15388
15389 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15390 return SDValue();
15391
15392 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15393 bool ValidTypeForStoreFltAsInt =
15394 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15395 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15396
15397 // TODO: Lower conversion from f128 on all VSX targets
15398 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15399 return SDValue();
15400
15401 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15402 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15403 return SDValue();
15404
15405 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15406
15407 // Set number of bytes being converted.
15408 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15409 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15410 DAG.getIntPtrConstant(ByteSize, dl, false),
15411 DAG.getValueType(Op1VT)};
15412
15414 DAG.getVTList(MVT::Other), Ops,
15415 cast<StoreSDNode>(N)->getMemoryVT(),
15416 cast<StoreSDNode>(N)->getMemOperand());
15417
15418 return Val;
15419}
15420
15421static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15422 // Check that the source of the element keeps flipping
15423 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15424 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15425 for (int i = 1, e = Mask.size(); i < e; i++) {
15426 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15427 return false;
15428 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15429 return false;
15430 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15431 }
15432 return true;
15433}
15434
15435static bool isSplatBV(SDValue Op) {
15436 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15437 return false;
15438 SDValue FirstOp;
15439
15440 // Find first non-undef input.
15441 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15442 FirstOp = Op.getOperand(i);
15443 if (!FirstOp.isUndef())
15444 break;
15445 }
15446
15447 // All inputs are undef or the same as the first non-undef input.
15448 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15449 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15450 return false;
15451 return true;
15452}
15453
15455 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15456 return Op;
15457 if (Op.getOpcode() != ISD::BITCAST)
15458 return SDValue();
15459 Op = Op.getOperand(0);
15460 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15461 return Op;
15462 return SDValue();
15463}
15464
15465// Fix up the shuffle mask to account for the fact that the result of
15466// scalar_to_vector is not in lane zero. This just takes all values in
15467// the ranges specified by the min/max indices and adds the number of
15468// elements required to ensure each element comes from the respective
15469// position in the valid lane.
15470// On little endian, that's just the corresponding element in the other
15471// half of the vector. On big endian, it is in the same half but right
15472// justified rather than left justified in that half.
15474 int LHSMaxIdx, int RHSMinIdx,
15475 int RHSMaxIdx, int HalfVec,
15476 unsigned ValidLaneWidth,
15477 const PPCSubtarget &Subtarget) {
15478 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15479 int Idx = ShuffV[i];
15480 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15481 ShuffV[i] +=
15482 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15483 }
15484}
15485
15486// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15487// the original is:
15488// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15489// In such a case, just change the shuffle mask to extract the element
15490// from the permuted index.
15492 const PPCSubtarget &Subtarget) {
15493 SDLoc dl(OrigSToV);
15494 EVT VT = OrigSToV.getValueType();
15495 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15496 "Expecting a SCALAR_TO_VECTOR here");
15497 SDValue Input = OrigSToV.getOperand(0);
15498
15499 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15500 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15501 SDValue OrigVector = Input.getOperand(0);
15502
15503 // Can't handle non-const element indices or different vector types
15504 // for the input to the extract and the output of the scalar_to_vector.
15505 if (Idx && VT == OrigVector.getValueType()) {
15506 unsigned NumElts = VT.getVectorNumElements();
15507 assert(
15508 NumElts > 1 &&
15509 "Cannot produce a permuted scalar_to_vector for one element vector");
15510 SmallVector<int, 16> NewMask(NumElts, -1);
15511 unsigned ResultInElt = NumElts / 2;
15512 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15513 NewMask[ResultInElt] = Idx->getZExtValue();
15514 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15515 }
15516 }
15517 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15518 OrigSToV.getOperand(0));
15519}
15520
15521// On little endian subtargets, combine shuffles such as:
15522// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15523// into:
15524// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15525// because the latter can be matched to a single instruction merge.
15526// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15527// to put the value into element zero. Adjust the shuffle mask so that the
15528// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15529// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15530// nodes with elements smaller than doubleword because all the ways
15531// of getting scalar data into a vector register put the value in the
15532// rightmost element of the left half of the vector.
15533SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15534 SelectionDAG &DAG) const {
15535 SDValue LHS = SVN->getOperand(0);
15536 SDValue RHS = SVN->getOperand(1);
15537 auto Mask = SVN->getMask();
15538 int NumElts = LHS.getValueType().getVectorNumElements();
15539 SDValue Res(SVN, 0);
15540 SDLoc dl(SVN);
15541 bool IsLittleEndian = Subtarget.isLittleEndian();
15542
15543 // On big endian targets this is only useful for subtargets with direct moves.
15544 // On little endian targets it would be useful for all subtargets with VSX.
15545 // However adding special handling for LE subtargets without direct moves
15546 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15547 // which includes direct moves.
15548 if (!Subtarget.hasDirectMove())
15549 return Res;
15550
15551 // If this is not a shuffle of a shuffle and the first element comes from
15552 // the second vector, canonicalize to the commuted form. This will make it
15553 // more likely to match one of the single instruction patterns.
15554 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15555 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15556 std::swap(LHS, RHS);
15557 Res = DAG.getCommutedVectorShuffle(*SVN);
15558 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15559 }
15560
15561 // Adjust the shuffle mask if either input vector comes from a
15562 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15563 // form (to prevent the need for a swap).
15564 SmallVector<int, 16> ShuffV(Mask);
15565 SDValue SToVLHS = isScalarToVec(LHS);
15566 SDValue SToVRHS = isScalarToVec(RHS);
15567 if (SToVLHS || SToVRHS) {
15568 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15569 // same type and have differing element sizes, then do not perform
15570 // the following transformation. The current transformation for
15571 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15572 // element size. This will be updated in the future to account for
15573 // differing sizes of the LHS and RHS.
15574 if (SToVLHS && SToVRHS &&
15575 (SToVLHS.getValueType().getScalarSizeInBits() !=
15576 SToVRHS.getValueType().getScalarSizeInBits()))
15577 return Res;
15578
15579 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15580 : SToVRHS.getValueType().getVectorNumElements();
15581 int NumEltsOut = ShuffV.size();
15582 // The width of the "valid lane" (i.e. the lane that contains the value that
15583 // is vectorized) needs to be expressed in terms of the number of elements
15584 // of the shuffle. It is thereby the ratio of the values before and after
15585 // any bitcast.
15586 unsigned ValidLaneWidth =
15587 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15588 LHS.getValueType().getScalarSizeInBits()
15589 : SToVRHS.getValueType().getScalarSizeInBits() /
15590 RHS.getValueType().getScalarSizeInBits();
15591
15592 // Initially assume that neither input is permuted. These will be adjusted
15593 // accordingly if either input is.
15594 int LHSMaxIdx = -1;
15595 int RHSMinIdx = -1;
15596 int RHSMaxIdx = -1;
15597 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15598
15599 // Get the permuted scalar to vector nodes for the source(s) that come from
15600 // ISD::SCALAR_TO_VECTOR.
15601 // On big endian systems, this only makes sense for element sizes smaller
15602 // than 64 bits since for 64-bit elements, all instructions already put
15603 // the value into element zero. Since scalar size of LHS and RHS may differ
15604 // after isScalarToVec, this should be checked using their own sizes.
15605 if (SToVLHS) {
15606 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15607 return Res;
15608 // Set up the values for the shuffle vector fixup.
15609 LHSMaxIdx = NumEltsOut / NumEltsIn;
15610 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15611 if (SToVLHS.getValueType() != LHS.getValueType())
15612 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15613 LHS = SToVLHS;
15614 }
15615 if (SToVRHS) {
15616 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15617 return Res;
15618 RHSMinIdx = NumEltsOut;
15619 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15620 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15621 if (SToVRHS.getValueType() != RHS.getValueType())
15622 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15623 RHS = SToVRHS;
15624 }
15625
15626 // Fix up the shuffle mask to reflect where the desired element actually is.
15627 // The minimum and maximum indices that correspond to element zero for both
15628 // the LHS and RHS are computed and will control which shuffle mask entries
15629 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15630 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15631 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15632 HalfVec, ValidLaneWidth, Subtarget);
15633 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15634
15635 // We may have simplified away the shuffle. We won't be able to do anything
15636 // further with it here.
15637 if (!isa<ShuffleVectorSDNode>(Res))
15638 return Res;
15639 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15640 }
15641
15642 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15643 // The common case after we commuted the shuffle is that the RHS is a splat
15644 // and we have elements coming in from the splat at indices that are not
15645 // conducive to using a merge.
15646 // Example:
15647 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15648 if (!isSplatBV(TheSplat))
15649 return Res;
15650
15651 // We are looking for a mask such that all even elements are from
15652 // one vector and all odd elements from the other.
15653 if (!isAlternatingShuffMask(Mask, NumElts))
15654 return Res;
15655
15656 // Adjust the mask so we are pulling in the same index from the splat
15657 // as the index from the interesting vector in consecutive elements.
15658 if (IsLittleEndian) {
15659 // Example (even elements from first vector):
15660 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15661 if (Mask[0] < NumElts)
15662 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15663 if (ShuffV[i] < 0)
15664 continue;
15665 // If element from non-splat is undef, pick first element from splat.
15666 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15667 }
15668 // Example (odd elements from first vector):
15669 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15670 else
15671 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15672 if (ShuffV[i] < 0)
15673 continue;
15674 // If element from non-splat is undef, pick first element from splat.
15675 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15676 }
15677 } else {
15678 // Example (even elements from first vector):
15679 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15680 if (Mask[0] < NumElts)
15681 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15682 if (ShuffV[i] < 0)
15683 continue;
15684 // If element from non-splat is undef, pick first element from splat.
15685 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15686 }
15687 // Example (odd elements from first vector):
15688 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15689 else
15690 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15691 if (ShuffV[i] < 0)
15692 continue;
15693 // If element from non-splat is undef, pick first element from splat.
15694 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15695 }
15696 }
15697
15698 // If the RHS has undefs, we need to remove them since we may have created
15699 // a shuffle that adds those instead of the splat value.
15700 SDValue SplatVal =
15701 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15702 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15703
15704 if (IsLittleEndian)
15705 RHS = TheSplat;
15706 else
15707 LHS = TheSplat;
15708 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15709}
15710
15711SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15712 LSBaseSDNode *LSBase,
15713 DAGCombinerInfo &DCI) const {
15714 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15715 "Not a reverse memop pattern!");
15716
15717 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15718 auto Mask = SVN->getMask();
15719 int i = 0;
15720 auto I = Mask.rbegin();
15721 auto E = Mask.rend();
15722
15723 for (; I != E; ++I) {
15724 if (*I != i)
15725 return false;
15726 i++;
15727 }
15728 return true;
15729 };
15730
15731 SelectionDAG &DAG = DCI.DAG;
15732 EVT VT = SVN->getValueType(0);
15733
15734 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15735 return SDValue();
15736
15737 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15738 // See comment in PPCVSXSwapRemoval.cpp.
15739 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15740 if (!Subtarget.hasP9Vector())
15741 return SDValue();
15742
15743 if(!IsElementReverse(SVN))
15744 return SDValue();
15745
15746 if (LSBase->getOpcode() == ISD::LOAD) {
15747 // If the load return value 0 has more than one user except the
15748 // shufflevector instruction, it is not profitable to replace the
15749 // shufflevector with a reverse load.
15750 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15751 UI != UE; ++UI)
15752 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15753 return SDValue();
15754
15755 SDLoc dl(LSBase);
15756 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15757 return DAG.getMemIntrinsicNode(
15758 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15759 LSBase->getMemoryVT(), LSBase->getMemOperand());
15760 }
15761
15762 if (LSBase->getOpcode() == ISD::STORE) {
15763 // If there are other uses of the shuffle, the swap cannot be avoided.
15764 // Forcing the use of an X-Form (since swapped stores only have
15765 // X-Forms) without removing the swap is unprofitable.
15766 if (!SVN->hasOneUse())
15767 return SDValue();
15768
15769 SDLoc dl(LSBase);
15770 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15771 LSBase->getBasePtr()};
15772 return DAG.getMemIntrinsicNode(
15773 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15774 LSBase->getMemoryVT(), LSBase->getMemOperand());
15775 }
15776
15777 llvm_unreachable("Expected a load or store node here");
15778}
15779
15780static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15781 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15782 if (IntrinsicID == Intrinsic::ppc_stdcx)
15783 StoreWidth = 8;
15784 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15785 StoreWidth = 4;
15786 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15787 StoreWidth = 2;
15788 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15789 StoreWidth = 1;
15790 else
15791 return false;
15792 return true;
15793}
15794
15796 DAGCombinerInfo &DCI) const {
15797 SelectionDAG &DAG = DCI.DAG;
15798 SDLoc dl(N);
15799 switch (N->getOpcode()) {
15800 default: break;
15801 case ISD::ADD:
15802 return combineADD(N, DCI);
15803 case ISD::AND: {
15804 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15805 // original input as that will prevent us from selecting optimal rotates.
15806 // This only matters if the input to the extend is i32 widened to i64.
15807 SDValue Op1 = N->getOperand(0);
15808 SDValue Op2 = N->getOperand(1);
15809 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15810 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15811 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15812 Op1.getOperand(0).getValueType() != MVT::i32)
15813 break;
15814 SDValue NarrowOp = Op1.getOperand(0);
15815 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15816 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15817 break;
15818
15819 uint64_t Imm = Op2->getAsZExtVal();
15820 // Make sure that the constant is narrow enough to fit in the narrow type.
15821 if (!isUInt<32>(Imm))
15822 break;
15823 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15824 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15825 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15826 }
15827 case ISD::SHL:
15828 return combineSHL(N, DCI);
15829 case ISD::SRA:
15830 return combineSRA(N, DCI);
15831 case ISD::SRL:
15832 return combineSRL(N, DCI);
15833 case ISD::MUL:
15834 return combineMUL(N, DCI);
15835 case ISD::FMA:
15836 case PPCISD::FNMSUB:
15837 return combineFMALike(N, DCI);
15838 case PPCISD::SHL:
15839 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15840 return N->getOperand(0);
15841 break;
15842 case PPCISD::SRL:
15843 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15844 return N->getOperand(0);
15845 break;
15846 case PPCISD::SRA:
15847 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15848 if (C->isZero() || // 0 >>s V -> 0.
15849 C->isAllOnes()) // -1 >>s V -> -1.
15850 return N->getOperand(0);
15851 }
15852 break;
15853 case ISD::SIGN_EXTEND:
15854 case ISD::ZERO_EXTEND:
15855 case ISD::ANY_EXTEND:
15856 return DAGCombineExtBoolTrunc(N, DCI);
15857 case ISD::TRUNCATE:
15858 return combineTRUNCATE(N, DCI);
15859 case ISD::SETCC:
15860 if (SDValue CSCC = combineSetCC(N, DCI))
15861 return CSCC;
15862 [[fallthrough]];
15863 case ISD::SELECT_CC:
15864 return DAGCombineTruncBoolExt(N, DCI);
15865 case ISD::SINT_TO_FP:
15866 case ISD::UINT_TO_FP:
15867 return combineFPToIntToFP(N, DCI);
15869 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15870 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15871 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15872 }
15873 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15874 case ISD::STORE: {
15875
15876 EVT Op1VT = N->getOperand(1).getValueType();
15877 unsigned Opcode = N->getOperand(1).getOpcode();
15878
15879 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15880 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15881 SDValue Val = combineStoreFPToInt(N, DCI);
15882 if (Val)
15883 return Val;
15884 }
15885
15886 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15887 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15888 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15889 if (Val)
15890 return Val;
15891 }
15892
15893 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15894 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15895 N->getOperand(1).getNode()->hasOneUse() &&
15896 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15897 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15898
15899 // STBRX can only handle simple types and it makes no sense to store less
15900 // two bytes in byte-reversed order.
15901 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15902 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15903 break;
15904
15905 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15906 // Do an any-extend to 32-bits if this is a half-word input.
15907 if (BSwapOp.getValueType() == MVT::i16)
15908 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15909
15910 // If the type of BSWAP operand is wider than stored memory width
15911 // it need to be shifted to the right side before STBRX.
15912 if (Op1VT.bitsGT(mVT)) {
15913 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15914 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15915 DAG.getConstant(Shift, dl, MVT::i32));
15916 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15917 if (Op1VT == MVT::i64)
15918 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15919 }
15920
15921 SDValue Ops[] = {
15922 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15923 };
15924 return
15925 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15926 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15927 cast<StoreSDNode>(N)->getMemOperand());
15928 }
15929
15930 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15931 // So it can increase the chance of CSE constant construction.
15932 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15933 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15934 // Need to sign-extended to 64-bits to handle negative values.
15935 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15936 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15937 MemVT.getSizeInBits());
15938 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15939
15940 // DAG.getTruncStore() can't be used here because it doesn't accept
15941 // the general (base + offset) addressing mode.
15942 // So we use UpdateNodeOperands and setTruncatingStore instead.
15943 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15944 N->getOperand(3));
15945 cast<StoreSDNode>(N)->setTruncatingStore(true);
15946 return SDValue(N, 0);
15947 }
15948
15949 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15950 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15951 if (Op1VT.isSimple()) {
15952 MVT StoreVT = Op1VT.getSimpleVT();
15953 if (Subtarget.needsSwapsForVSXMemOps() &&
15954 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15955 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15956 return expandVSXStoreForLE(N, DCI);
15957 }
15958 break;
15959 }
15960 case ISD::LOAD: {
15961 LoadSDNode *LD = cast<LoadSDNode>(N);
15962 EVT VT = LD->getValueType(0);
15963
15964 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15965 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15966 if (VT.isSimple()) {
15967 MVT LoadVT = VT.getSimpleVT();
15968 if (Subtarget.needsSwapsForVSXMemOps() &&
15969 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15970 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15971 return expandVSXLoadForLE(N, DCI);
15972 }
15973
15974 // We sometimes end up with a 64-bit integer load, from which we extract
15975 // two single-precision floating-point numbers. This happens with
15976 // std::complex<float>, and other similar structures, because of the way we
15977 // canonicalize structure copies. However, if we lack direct moves,
15978 // then the final bitcasts from the extracted integer values to the
15979 // floating-point numbers turn into store/load pairs. Even with direct moves,
15980 // just loading the two floating-point numbers is likely better.
15981 auto ReplaceTwoFloatLoad = [&]() {
15982 if (VT != MVT::i64)
15983 return false;
15984
15985 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15986 LD->isVolatile())
15987 return false;
15988
15989 // We're looking for a sequence like this:
15990 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15991 // t16: i64 = srl t13, Constant:i32<32>
15992 // t17: i32 = truncate t16
15993 // t18: f32 = bitcast t17
15994 // t19: i32 = truncate t13
15995 // t20: f32 = bitcast t19
15996
15997 if (!LD->hasNUsesOfValue(2, 0))
15998 return false;
15999
16000 auto UI = LD->use_begin();
16001 while (UI.getUse().getResNo() != 0) ++UI;
16002 SDNode *Trunc = *UI++;
16003 while (UI.getUse().getResNo() != 0) ++UI;
16004 SDNode *RightShift = *UI;
16005 if (Trunc->getOpcode() != ISD::TRUNCATE)
16006 std::swap(Trunc, RightShift);
16007
16008 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16009 Trunc->getValueType(0) != MVT::i32 ||
16010 !Trunc->hasOneUse())
16011 return false;
16012 if (RightShift->getOpcode() != ISD::SRL ||
16013 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16014 RightShift->getConstantOperandVal(1) != 32 ||
16015 !RightShift->hasOneUse())
16016 return false;
16017
16018 SDNode *Trunc2 = *RightShift->use_begin();
16019 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16020 Trunc2->getValueType(0) != MVT::i32 ||
16021 !Trunc2->hasOneUse())
16022 return false;
16023
16024 SDNode *Bitcast = *Trunc->use_begin();
16025 SDNode *Bitcast2 = *Trunc2->use_begin();
16026
16027 if (Bitcast->getOpcode() != ISD::BITCAST ||
16028 Bitcast->getValueType(0) != MVT::f32)
16029 return false;
16030 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16031 Bitcast2->getValueType(0) != MVT::f32)
16032 return false;
16033
16034 if (Subtarget.isLittleEndian())
16035 std::swap(Bitcast, Bitcast2);
16036
16037 // Bitcast has the second float (in memory-layout order) and Bitcast2
16038 // has the first one.
16039
16040 SDValue BasePtr = LD->getBasePtr();
16041 if (LD->isIndexed()) {
16042 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16043 "Non-pre-inc AM on PPC?");
16044 BasePtr =
16045 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16046 LD->getOffset());
16047 }
16048
16049 auto MMOFlags =
16050 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16051 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16052 LD->getPointerInfo(), LD->getAlign(),
16053 MMOFlags, LD->getAAInfo());
16054 SDValue AddPtr =
16055 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16056 BasePtr, DAG.getIntPtrConstant(4, dl));
16057 SDValue FloatLoad2 = DAG.getLoad(
16058 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16059 LD->getPointerInfo().getWithOffset(4),
16060 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16061
16062 if (LD->isIndexed()) {
16063 // Note that DAGCombine should re-form any pre-increment load(s) from
16064 // what is produced here if that makes sense.
16065 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16066 }
16067
16068 DCI.CombineTo(Bitcast2, FloatLoad);
16069 DCI.CombineTo(Bitcast, FloatLoad2);
16070
16071 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16072 SDValue(FloatLoad2.getNode(), 1));
16073 return true;
16074 };
16075
16076 if (ReplaceTwoFloatLoad())
16077 return SDValue(N, 0);
16078
16079 EVT MemVT = LD->getMemoryVT();
16080 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16081 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16082 if (LD->isUnindexed() && VT.isVector() &&
16083 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16084 // P8 and later hardware should just use LOAD.
16085 !Subtarget.hasP8Vector() &&
16086 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16087 VT == MVT::v4f32))) &&
16088 LD->getAlign() < ABIAlignment) {
16089 // This is a type-legal unaligned Altivec load.
16090 SDValue Chain = LD->getChain();
16091 SDValue Ptr = LD->getBasePtr();
16092 bool isLittleEndian = Subtarget.isLittleEndian();
16093
16094 // This implements the loading of unaligned vectors as described in
16095 // the venerable Apple Velocity Engine overview. Specifically:
16096 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16097 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16098 //
16099 // The general idea is to expand a sequence of one or more unaligned
16100 // loads into an alignment-based permutation-control instruction (lvsl
16101 // or lvsr), a series of regular vector loads (which always truncate
16102 // their input address to an aligned address), and a series of
16103 // permutations. The results of these permutations are the requested
16104 // loaded values. The trick is that the last "extra" load is not taken
16105 // from the address you might suspect (sizeof(vector) bytes after the
16106 // last requested load), but rather sizeof(vector) - 1 bytes after the
16107 // last requested vector. The point of this is to avoid a page fault if
16108 // the base address happened to be aligned. This works because if the
16109 // base address is aligned, then adding less than a full vector length
16110 // will cause the last vector in the sequence to be (re)loaded.
16111 // Otherwise, the next vector will be fetched as you might suspect was
16112 // necessary.
16113
16114 // We might be able to reuse the permutation generation from
16115 // a different base address offset from this one by an aligned amount.
16116 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16117 // optimization later.
16118 Intrinsic::ID Intr, IntrLD, IntrPerm;
16119 MVT PermCntlTy, PermTy, LDTy;
16120 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16121 : Intrinsic::ppc_altivec_lvsl;
16122 IntrLD = Intrinsic::ppc_altivec_lvx;
16123 IntrPerm = Intrinsic::ppc_altivec_vperm;
16124 PermCntlTy = MVT::v16i8;
16125 PermTy = MVT::v4i32;
16126 LDTy = MVT::v4i32;
16127
16128 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16129
16130 // Create the new MMO for the new base load. It is like the original MMO,
16131 // but represents an area in memory almost twice the vector size centered
16132 // on the original address. If the address is unaligned, we might start
16133 // reading up to (sizeof(vector)-1) bytes below the address of the
16134 // original unaligned load.
16136 MachineMemOperand *BaseMMO =
16137 MF.getMachineMemOperand(LD->getMemOperand(),
16138 -(int64_t)MemVT.getStoreSize()+1,
16139 2*MemVT.getStoreSize()-1);
16140
16141 // Create the new base load.
16142 SDValue LDXIntID =
16143 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16144 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16145 SDValue BaseLoad =
16147 DAG.getVTList(PermTy, MVT::Other),
16148 BaseLoadOps, LDTy, BaseMMO);
16149
16150 // Note that the value of IncOffset (which is provided to the next
16151 // load's pointer info offset value, and thus used to calculate the
16152 // alignment), and the value of IncValue (which is actually used to
16153 // increment the pointer value) are different! This is because we
16154 // require the next load to appear to be aligned, even though it
16155 // is actually offset from the base pointer by a lesser amount.
16156 int IncOffset = VT.getSizeInBits() / 8;
16157 int IncValue = IncOffset;
16158
16159 // Walk (both up and down) the chain looking for another load at the real
16160 // (aligned) offset (the alignment of the other load does not matter in
16161 // this case). If found, then do not use the offset reduction trick, as
16162 // that will prevent the loads from being later combined (as they would
16163 // otherwise be duplicates).
16164 if (!findConsecutiveLoad(LD, DAG))
16165 --IncValue;
16166
16167 SDValue Increment =
16168 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16169 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16170
16171 MachineMemOperand *ExtraMMO =
16172 MF.getMachineMemOperand(LD->getMemOperand(),
16173 1, 2*MemVT.getStoreSize()-1);
16174 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16175 SDValue ExtraLoad =
16177 DAG.getVTList(PermTy, MVT::Other),
16178 ExtraLoadOps, LDTy, ExtraMMO);
16179
16180 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16181 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16182
16183 // Because vperm has a big-endian bias, we must reverse the order
16184 // of the input vectors and complement the permute control vector
16185 // when generating little endian code. We have already handled the
16186 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16187 // and ExtraLoad here.
16188 SDValue Perm;
16189 if (isLittleEndian)
16190 Perm = BuildIntrinsicOp(IntrPerm,
16191 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16192 else
16193 Perm = BuildIntrinsicOp(IntrPerm,
16194 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16195
16196 if (VT != PermTy)
16197 Perm = Subtarget.hasAltivec()
16198 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16199 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16200 DAG.getTargetConstant(1, dl, MVT::i64));
16201 // second argument is 1 because this rounding
16202 // is always exact.
16203
16204 // The output of the permutation is our loaded result, the TokenFactor is
16205 // our new chain.
16206 DCI.CombineTo(N, Perm, TF);
16207 return SDValue(N, 0);
16208 }
16209 }
16210 break;
16212 bool isLittleEndian = Subtarget.isLittleEndian();
16213 unsigned IID = N->getConstantOperandVal(0);
16214 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16215 : Intrinsic::ppc_altivec_lvsl);
16216 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16217 SDValue Add = N->getOperand(1);
16218
16219 int Bits = 4 /* 16 byte alignment */;
16220
16221 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16222 APInt::getAllOnes(Bits /* alignment */)
16223 .zext(Add.getScalarValueSizeInBits()))) {
16224 SDNode *BasePtr = Add->getOperand(0).getNode();
16225 for (SDNode *U : BasePtr->uses()) {
16226 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16227 U->getConstantOperandVal(0) == IID) {
16228 // We've found another LVSL/LVSR, and this address is an aligned
16229 // multiple of that one. The results will be the same, so use the
16230 // one we've just found instead.
16231
16232 return SDValue(U, 0);
16233 }
16234 }
16235 }
16236
16237 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16238 SDNode *BasePtr = Add->getOperand(0).getNode();
16239 for (SDNode *U : BasePtr->uses()) {
16240 if (U->getOpcode() == ISD::ADD &&
16241 isa<ConstantSDNode>(U->getOperand(1)) &&
16242 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16243 (1ULL << Bits) ==
16244 0) {
16245 SDNode *OtherAdd = U;
16246 for (SDNode *V : OtherAdd->uses()) {
16247 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16248 V->getConstantOperandVal(0) == IID) {
16249 return SDValue(V, 0);
16250 }
16251 }
16252 }
16253 }
16254 }
16255 }
16256
16257 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16258 // Expose the vabsduw/h/b opportunity for down stream
16259 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16260 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16261 IID == Intrinsic::ppc_altivec_vmaxsh ||
16262 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16263 SDValue V1 = N->getOperand(1);
16264 SDValue V2 = N->getOperand(2);
16265 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16266 V1.getSimpleValueType() == MVT::v8i16 ||
16267 V1.getSimpleValueType() == MVT::v16i8) &&
16268 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16269 // (0-a, a)
16270 if (V1.getOpcode() == ISD::SUB &&
16272 V1.getOperand(1) == V2) {
16273 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16274 }
16275 // (a, 0-a)
16276 if (V2.getOpcode() == ISD::SUB &&
16277 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16278 V2.getOperand(1) == V1) {
16279 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16280 }
16281 // (x-y, y-x)
16282 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16283 V1.getOperand(0) == V2.getOperand(1) &&
16284 V1.getOperand(1) == V2.getOperand(0)) {
16285 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16286 }
16287 }
16288 }
16289 }
16290
16291 break;
16293 switch (N->getConstantOperandVal(1)) {
16294 default:
16295 break;
16296 case Intrinsic::ppc_altivec_vsum4sbs:
16297 case Intrinsic::ppc_altivec_vsum4shs:
16298 case Intrinsic::ppc_altivec_vsum4ubs: {
16299 // These sum-across intrinsics only have a chain due to the side effect
16300 // that they may set the SAT bit. If we know the SAT bit will not be set
16301 // for some inputs, we can replace any uses of their chain with the
16302 // input chain.
16303 if (BuildVectorSDNode *BVN =
16304 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16305 APInt APSplatBits, APSplatUndef;
16306 unsigned SplatBitSize;
16307 bool HasAnyUndefs;
16308 bool BVNIsConstantSplat = BVN->isConstantSplat(
16309 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16310 !Subtarget.isLittleEndian());
16311 // If the constant splat vector is 0, the SAT bit will not be set.
16312 if (BVNIsConstantSplat && APSplatBits == 0)
16313 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16314 }
16315 return SDValue();
16316 }
16317 case Intrinsic::ppc_vsx_lxvw4x:
16318 case Intrinsic::ppc_vsx_lxvd2x:
16319 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16320 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16321 if (Subtarget.needsSwapsForVSXMemOps())
16322 return expandVSXLoadForLE(N, DCI);
16323 break;
16324 }
16325 break;
16327 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16328 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16329 if (Subtarget.needsSwapsForVSXMemOps()) {
16330 switch (N->getConstantOperandVal(1)) {
16331 default:
16332 break;
16333 case Intrinsic::ppc_vsx_stxvw4x:
16334 case Intrinsic::ppc_vsx_stxvd2x:
16335 return expandVSXStoreForLE(N, DCI);
16336 }
16337 }
16338 break;
16339 case ISD::BSWAP: {
16340 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16341 // For subtargets without LDBRX, we can still do better than the default
16342 // expansion even for 64-bit BSWAP (LOAD).
16343 bool Is64BitBswapOn64BitTgt =
16344 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16345 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16346 N->getOperand(0).hasOneUse();
16347 if (IsSingleUseNormalLd &&
16348 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16349 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16350 SDValue Load = N->getOperand(0);
16351 LoadSDNode *LD = cast<LoadSDNode>(Load);
16352 // Create the byte-swapping load.
16353 SDValue Ops[] = {
16354 LD->getChain(), // Chain
16355 LD->getBasePtr(), // Ptr
16356 DAG.getValueType(N->getValueType(0)) // VT
16357 };
16358 SDValue BSLoad =
16360 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16361 MVT::i64 : MVT::i32, MVT::Other),
16362 Ops, LD->getMemoryVT(), LD->getMemOperand());
16363
16364 // If this is an i16 load, insert the truncate.
16365 SDValue ResVal = BSLoad;
16366 if (N->getValueType(0) == MVT::i16)
16367 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16368
16369 // First, combine the bswap away. This makes the value produced by the
16370 // load dead.
16371 DCI.CombineTo(N, ResVal);
16372
16373 // Next, combine the load away, we give it a bogus result value but a real
16374 // chain result. The result value is dead because the bswap is dead.
16375 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16376
16377 // Return N so it doesn't get rechecked!
16378 return SDValue(N, 0);
16379 }
16380 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16381 // before legalization so that the BUILD_PAIR is handled correctly.
16382 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16383 !IsSingleUseNormalLd)
16384 return SDValue();
16385 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16386
16387 // Can't split volatile or atomic loads.
16388 if (!LD->isSimple())
16389 return SDValue();
16390 SDValue BasePtr = LD->getBasePtr();
16391 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16392 LD->getPointerInfo(), LD->getAlign());
16393 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16394 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16395 DAG.getIntPtrConstant(4, dl));
16397 LD->getMemOperand(), 4, 4);
16398 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16399 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16400 SDValue Res;
16401 if (Subtarget.isLittleEndian())
16402 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16403 else
16404 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16405 SDValue TF =
16406 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16407 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16408 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16409 return Res;
16410 }
16411 case PPCISD::VCMP:
16412 // If a VCMP_rec node already exists with exactly the same operands as this
16413 // node, use its result instead of this node (VCMP_rec computes both a CR6
16414 // and a normal output).
16415 //
16416 if (!N->getOperand(0).hasOneUse() &&
16417 !N->getOperand(1).hasOneUse() &&
16418 !N->getOperand(2).hasOneUse()) {
16419
16420 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16421 SDNode *VCMPrecNode = nullptr;
16422
16423 SDNode *LHSN = N->getOperand(0).getNode();
16424 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16425 UI != E; ++UI)
16426 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16427 UI->getOperand(1) == N->getOperand(1) &&
16428 UI->getOperand(2) == N->getOperand(2) &&
16429 UI->getOperand(0) == N->getOperand(0)) {
16430 VCMPrecNode = *UI;
16431 break;
16432 }
16433
16434 // If there is no VCMP_rec node, or if the flag value has a single use,
16435 // don't transform this.
16436 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16437 break;
16438
16439 // Look at the (necessarily single) use of the flag value. If it has a
16440 // chain, this transformation is more complex. Note that multiple things
16441 // could use the value result, which we should ignore.
16442 SDNode *FlagUser = nullptr;
16443 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16444 FlagUser == nullptr; ++UI) {
16445 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16446 SDNode *User = *UI;
16447 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16448 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16449 FlagUser = User;
16450 break;
16451 }
16452 }
16453 }
16454
16455 // If the user is a MFOCRF instruction, we know this is safe.
16456 // Otherwise we give up for right now.
16457 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16458 return SDValue(VCMPrecNode, 0);
16459 }
16460 break;
16461 case ISD::BR_CC: {
16462 // If this is a branch on an altivec predicate comparison, lower this so
16463 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16464 // lowering is done pre-legalize, because the legalizer lowers the predicate
16465 // compare down to code that is difficult to reassemble.
16466 // This code also handles branches that depend on the result of a store
16467 // conditional.
16468 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16469 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16470
16471 int CompareOpc;
16472 bool isDot;
16473
16474 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16475 break;
16476
16477 // Since we are doing this pre-legalize, the RHS can be a constant of
16478 // arbitrary bitwidth which may cause issues when trying to get the value
16479 // from the underlying APInt.
16480 auto RHSAPInt = RHS->getAsAPIntVal();
16481 if (!RHSAPInt.isIntN(64))
16482 break;
16483
16484 unsigned Val = RHSAPInt.getZExtValue();
16485 auto isImpossibleCompare = [&]() {
16486 // If this is a comparison against something other than 0/1, then we know
16487 // that the condition is never/always true.
16488 if (Val != 0 && Val != 1) {
16489 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16490 return N->getOperand(0);
16491 // Always !=, turn it into an unconditional branch.
16492 return DAG.getNode(ISD::BR, dl, MVT::Other,
16493 N->getOperand(0), N->getOperand(4));
16494 }
16495 return SDValue();
16496 };
16497 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16498 unsigned StoreWidth = 0;
16499 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16500 isStoreConditional(LHS, StoreWidth)) {
16501 if (SDValue Impossible = isImpossibleCompare())
16502 return Impossible;
16503 PPC::Predicate CompOpc;
16504 // eq 0 => ne
16505 // ne 0 => eq
16506 // eq 1 => eq
16507 // ne 1 => ne
16508 if (Val == 0)
16509 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16510 else
16511 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16512
16513 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16514 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16515 auto *MemNode = cast<MemSDNode>(LHS);
16516 SDValue ConstSt = DAG.getMemIntrinsicNode(
16518 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16519 MemNode->getMemoryVT(), MemNode->getMemOperand());
16520
16521 SDValue InChain;
16522 // Unchain the branch from the original store conditional.
16523 if (N->getOperand(0) == LHS.getValue(1))
16524 InChain = LHS.getOperand(0);
16525 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16526 SmallVector<SDValue, 4> InChains;
16527 SDValue InTF = N->getOperand(0);
16528 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16529 if (InTF.getOperand(i) != LHS.getValue(1))
16530 InChains.push_back(InTF.getOperand(i));
16531 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16532 }
16533
16534 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16535 DAG.getConstant(CompOpc, dl, MVT::i32),
16536 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16537 ConstSt.getValue(2));
16538 }
16539
16540 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16541 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16542 assert(isDot && "Can't compare against a vector result!");
16543
16544 if (SDValue Impossible = isImpossibleCompare())
16545 return Impossible;
16546
16547 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16548 // Create the PPCISD altivec 'dot' comparison node.
16549 SDValue Ops[] = {
16550 LHS.getOperand(2), // LHS of compare
16551 LHS.getOperand(3), // RHS of compare
16552 DAG.getConstant(CompareOpc, dl, MVT::i32)
16553 };
16554 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16555 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16556
16557 // Unpack the result based on how the target uses it.
16558 PPC::Predicate CompOpc;
16559 switch (LHS.getConstantOperandVal(1)) {
16560 default: // Can't happen, don't crash on invalid number though.
16561 case 0: // Branch on the value of the EQ bit of CR6.
16562 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16563 break;
16564 case 1: // Branch on the inverted value of the EQ bit of CR6.
16565 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16566 break;
16567 case 2: // Branch on the value of the LT bit of CR6.
16568 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16569 break;
16570 case 3: // Branch on the inverted value of the LT bit of CR6.
16571 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16572 break;
16573 }
16574
16575 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16576 DAG.getConstant(CompOpc, dl, MVT::i32),
16577 DAG.getRegister(PPC::CR6, MVT::i32),
16578 N->getOperand(4), CompNode.getValue(1));
16579 }
16580 break;
16581 }
16582 case ISD::BUILD_VECTOR:
16583 return DAGCombineBuildVector(N, DCI);
16584 }
16585
16586 return SDValue();
16587}
16588
16589SDValue
16591 SelectionDAG &DAG,
16592 SmallVectorImpl<SDNode *> &Created) const {
16593 // fold (sdiv X, pow2)
16594 EVT VT = N->getValueType(0);
16595 if (VT == MVT::i64 && !Subtarget.isPPC64())
16596 return SDValue();
16597 if ((VT != MVT::i32 && VT != MVT::i64) ||
16598 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16599 return SDValue();
16600
16601 SDLoc DL(N);
16602 SDValue N0 = N->getOperand(0);
16603
16604 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16605 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16606 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16607
16608 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16609 Created.push_back(Op.getNode());
16610
16611 if (IsNegPow2) {
16612 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16613 Created.push_back(Op.getNode());
16614 }
16615
16616 return Op;
16617}
16618
16619//===----------------------------------------------------------------------===//
16620// Inline Assembly Support
16621//===----------------------------------------------------------------------===//
16622
16624 KnownBits &Known,
16625 const APInt &DemandedElts,
16626 const SelectionDAG &DAG,
16627 unsigned Depth) const {
16628 Known.resetAll();
16629 switch (Op.getOpcode()) {
16630 default: break;
16631 case PPCISD::LBRX: {
16632 // lhbrx is known to have the top bits cleared out.
16633 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16634 Known.Zero = 0xFFFF0000;
16635 break;
16636 }
16638 switch (Op.getConstantOperandVal(0)) {
16639 default: break;
16640 case Intrinsic::ppc_altivec_vcmpbfp_p:
16641 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16642 case Intrinsic::ppc_altivec_vcmpequb_p:
16643 case Intrinsic::ppc_altivec_vcmpequh_p:
16644 case Intrinsic::ppc_altivec_vcmpequw_p:
16645 case Intrinsic::ppc_altivec_vcmpequd_p:
16646 case Intrinsic::ppc_altivec_vcmpequq_p:
16647 case Intrinsic::ppc_altivec_vcmpgefp_p:
16648 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16649 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16650 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16651 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16652 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16653 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16654 case Intrinsic::ppc_altivec_vcmpgtub_p:
16655 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16656 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16657 case Intrinsic::ppc_altivec_vcmpgtud_p:
16658 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16659 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16660 break;
16661 }
16662 break;
16663 }
16665 switch (Op.getConstantOperandVal(1)) {
16666 default:
16667 break;
16668 case Intrinsic::ppc_load2r:
16669 // Top bits are cleared for load2r (which is the same as lhbrx).
16670 Known.Zero = 0xFFFF0000;
16671 break;
16672 }
16673 break;
16674 }
16675 }
16676}
16677
16679 switch (Subtarget.getCPUDirective()) {
16680 default: break;
16681 case PPC::DIR_970:
16682 case PPC::DIR_PWR4:
16683 case PPC::DIR_PWR5:
16684 case PPC::DIR_PWR5X:
16685 case PPC::DIR_PWR6:
16686 case PPC::DIR_PWR6X:
16687 case PPC::DIR_PWR7:
16688 case PPC::DIR_PWR8:
16689 case PPC::DIR_PWR9:
16690 case PPC::DIR_PWR10:
16691 case PPC::DIR_PWR11:
16692 case PPC::DIR_PWR_FUTURE: {
16693 if (!ML)
16694 break;
16695
16697 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16698 // so that we can decrease cache misses and branch-prediction misses.
16699 // Actual alignment of the loop will depend on the hotness check and other
16700 // logic in alignBlocks.
16701 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16702 return Align(32);
16703 }
16704
16705 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16706
16707 // For small loops (between 5 and 8 instructions), align to a 32-byte
16708 // boundary so that the entire loop fits in one instruction-cache line.
16709 uint64_t LoopSize = 0;
16710 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16711 for (const MachineInstr &J : **I) {
16712 LoopSize += TII->getInstSizeInBytes(J);
16713 if (LoopSize > 32)
16714 break;
16715 }
16716
16717 if (LoopSize > 16 && LoopSize <= 32)
16718 return Align(32);
16719
16720 break;
16721 }
16722 }
16723
16725}
16726
16727/// getConstraintType - Given a constraint, return the type of
16728/// constraint it is for this target.
16731 if (Constraint.size() == 1) {
16732 switch (Constraint[0]) {
16733 default: break;
16734 case 'b':
16735 case 'r':
16736 case 'f':
16737 case 'd':
16738 case 'v':
16739 case 'y':
16740 return C_RegisterClass;
16741 case 'Z':
16742 // FIXME: While Z does indicate a memory constraint, it specifically
16743 // indicates an r+r address (used in conjunction with the 'y' modifier
16744 // in the replacement string). Currently, we're forcing the base
16745 // register to be r0 in the asm printer (which is interpreted as zero)
16746 // and forming the complete address in the second register. This is
16747 // suboptimal.
16748 return C_Memory;
16749 }
16750 } else if (Constraint == "wc") { // individual CR bits.
16751 return C_RegisterClass;
16752 } else if (Constraint == "wa" || Constraint == "wd" ||
16753 Constraint == "wf" || Constraint == "ws" ||
16754 Constraint == "wi" || Constraint == "ww") {
16755 return C_RegisterClass; // VSX registers.
16756 }
16757 return TargetLowering::getConstraintType(Constraint);
16758}
16759
16760/// Examine constraint type and operand type and determine a weight value.
16761/// This object must already have been set up with the operand type
16762/// and the current alternative constraint selected.
16765 AsmOperandInfo &info, const char *constraint) const {
16767 Value *CallOperandVal = info.CallOperandVal;
16768 // If we don't have a value, we can't do a match,
16769 // but allow it at the lowest weight.
16770 if (!CallOperandVal)
16771 return CW_Default;
16772 Type *type = CallOperandVal->getType();
16773
16774 // Look at the constraint type.
16775 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16776 return CW_Register; // an individual CR bit.
16777 else if ((StringRef(constraint) == "wa" ||
16778 StringRef(constraint) == "wd" ||
16779 StringRef(constraint) == "wf") &&
16780 type->isVectorTy())
16781 return CW_Register;
16782 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16783 return CW_Register; // just hold 64-bit integers data.
16784 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16785 return CW_Register;
16786 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16787 return CW_Register;
16788
16789 switch (*constraint) {
16790 default:
16792 break;
16793 case 'b':
16794 if (type->isIntegerTy())
16795 weight = CW_Register;
16796 break;
16797 case 'f':
16798 if (type->isFloatTy())
16799 weight = CW_Register;
16800 break;
16801 case 'd':
16802 if (type->isDoubleTy())
16803 weight = CW_Register;
16804 break;
16805 case 'v':
16806 if (type->isVectorTy())
16807 weight = CW_Register;
16808 break;
16809 case 'y':
16810 weight = CW_Register;
16811 break;
16812 case 'Z':
16813 weight = CW_Memory;
16814 break;
16815 }
16816 return weight;
16817}
16818
16819std::pair<unsigned, const TargetRegisterClass *>
16821 StringRef Constraint,
16822 MVT VT) const {
16823 if (Constraint.size() == 1) {
16824 // GCC RS6000 Constraint Letters
16825 switch (Constraint[0]) {
16826 case 'b': // R1-R31
16827 if (VT == MVT::i64 && Subtarget.isPPC64())
16828 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16829 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16830 case 'r': // R0-R31
16831 if (VT == MVT::i64 && Subtarget.isPPC64())
16832 return std::make_pair(0U, &PPC::G8RCRegClass);
16833 return std::make_pair(0U, &PPC::GPRCRegClass);
16834 // 'd' and 'f' constraints are both defined to be "the floating point
16835 // registers", where one is for 32-bit and the other for 64-bit. We don't
16836 // really care overly much here so just give them all the same reg classes.
16837 case 'd':
16838 case 'f':
16839 if (Subtarget.hasSPE()) {
16840 if (VT == MVT::f32 || VT == MVT::i32)
16841 return std::make_pair(0U, &PPC::GPRCRegClass);
16842 if (VT == MVT::f64 || VT == MVT::i64)
16843 return std::make_pair(0U, &PPC::SPERCRegClass);
16844 } else {
16845 if (VT == MVT::f32 || VT == MVT::i32)
16846 return std::make_pair(0U, &PPC::F4RCRegClass);
16847 if (VT == MVT::f64 || VT == MVT::i64)
16848 return std::make_pair(0U, &PPC::F8RCRegClass);
16849 }
16850 break;
16851 case 'v':
16852 if (Subtarget.hasAltivec() && VT.isVector())
16853 return std::make_pair(0U, &PPC::VRRCRegClass);
16854 else if (Subtarget.hasVSX())
16855 // Scalars in Altivec registers only make sense with VSX.
16856 return std::make_pair(0U, &PPC::VFRCRegClass);
16857 break;
16858 case 'y': // crrc
16859 return std::make_pair(0U, &PPC::CRRCRegClass);
16860 }
16861 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16862 // An individual CR bit.
16863 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16864 } else if ((Constraint == "wa" || Constraint == "wd" ||
16865 Constraint == "wf" || Constraint == "wi") &&
16866 Subtarget.hasVSX()) {
16867 // A VSX register for either a scalar (FP) or vector. There is no
16868 // support for single precision scalars on subtargets prior to Power8.
16869 if (VT.isVector())
16870 return std::make_pair(0U, &PPC::VSRCRegClass);
16871 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16872 return std::make_pair(0U, &PPC::VSSRCRegClass);
16873 return std::make_pair(0U, &PPC::VSFRCRegClass);
16874 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16875 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16876 return std::make_pair(0U, &PPC::VSSRCRegClass);
16877 else
16878 return std::make_pair(0U, &PPC::VSFRCRegClass);
16879 } else if (Constraint == "lr") {
16880 if (VT == MVT::i64)
16881 return std::make_pair(0U, &PPC::LR8RCRegClass);
16882 else
16883 return std::make_pair(0U, &PPC::LRRCRegClass);
16884 }
16885
16886 // Handle special cases of physical registers that are not properly handled
16887 // by the base class.
16888 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16889 // If we name a VSX register, we can't defer to the base class because it
16890 // will not recognize the correct register (their names will be VSL{0-31}
16891 // and V{0-31} so they won't match). So we match them here.
16892 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16893 int VSNum = atoi(Constraint.data() + 3);
16894 assert(VSNum >= 0 && VSNum <= 63 &&
16895 "Attempted to access a vsr out of range");
16896 if (VSNum < 32)
16897 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16898 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16899 }
16900
16901 // For float registers, we can't defer to the base class as it will match
16902 // the SPILLTOVSRRC class.
16903 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16904 int RegNum = atoi(Constraint.data() + 2);
16905 if (RegNum > 31 || RegNum < 0)
16906 report_fatal_error("Invalid floating point register number");
16907 if (VT == MVT::f32 || VT == MVT::i32)
16908 return Subtarget.hasSPE()
16909 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16910 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16911 if (VT == MVT::f64 || VT == MVT::i64)
16912 return Subtarget.hasSPE()
16913 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16914 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16915 }
16916 }
16917
16918 std::pair<unsigned, const TargetRegisterClass *> R =
16920
16921 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16922 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16923 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16924 // register.
16925 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16926 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16927 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16928 PPC::GPRCRegClass.contains(R.first))
16929 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16930 PPC::sub_32, &PPC::G8RCRegClass),
16931 &PPC::G8RCRegClass);
16932
16933 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16934 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16935 R.first = PPC::CR0;
16936 R.second = &PPC::CRRCRegClass;
16937 }
16938 // FIXME: This warning should ideally be emitted in the front end.
16939 const auto &TM = getTargetMachine();
16940 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16941 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16942 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16943 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16944 errs() << "warning: vector registers 20 to 32 are reserved in the "
16945 "default AIX AltiVec ABI and cannot be used\n";
16946 }
16947
16948 return R;
16949}
16950
16951/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16952/// vector. If it is invalid, don't add anything to Ops.
16954 StringRef Constraint,
16955 std::vector<SDValue> &Ops,
16956 SelectionDAG &DAG) const {
16957 SDValue Result;
16958
16959 // Only support length 1 constraints.
16960 if (Constraint.size() > 1)
16961 return;
16962
16963 char Letter = Constraint[0];
16964 switch (Letter) {
16965 default: break;
16966 case 'I':
16967 case 'J':
16968 case 'K':
16969 case 'L':
16970 case 'M':
16971 case 'N':
16972 case 'O':
16973 case 'P': {
16974 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16975 if (!CST) return; // Must be an immediate to match.
16976 SDLoc dl(Op);
16977 int64_t Value = CST->getSExtValue();
16978 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16979 // numbers are printed as such.
16980 switch (Letter) {
16981 default: llvm_unreachable("Unknown constraint letter!");
16982 case 'I': // "I" is a signed 16-bit constant.
16983 if (isInt<16>(Value))
16984 Result = DAG.getTargetConstant(Value, dl, TCVT);
16985 break;
16986 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16987 if (isShiftedUInt<16, 16>(Value))
16988 Result = DAG.getTargetConstant(Value, dl, TCVT);
16989 break;
16990 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16991 if (isShiftedInt<16, 16>(Value))
16992 Result = DAG.getTargetConstant(Value, dl, TCVT);
16993 break;
16994 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16995 if (isUInt<16>(Value))
16996 Result = DAG.getTargetConstant(Value, dl, TCVT);
16997 break;
16998 case 'M': // "M" is a constant that is greater than 31.
16999 if (Value > 31)
17000 Result = DAG.getTargetConstant(Value, dl, TCVT);
17001 break;
17002 case 'N': // "N" is a positive constant that is an exact power of two.
17003 if (Value > 0 && isPowerOf2_64(Value))
17004 Result = DAG.getTargetConstant(Value, dl, TCVT);
17005 break;
17006 case 'O': // "O" is the constant zero.
17007 if (Value == 0)
17008 Result = DAG.getTargetConstant(Value, dl, TCVT);
17009 break;
17010 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17011 if (isInt<16>(-Value))
17012 Result = DAG.getTargetConstant(Value, dl, TCVT);
17013 break;
17014 }
17015 break;
17016 }
17017 }
17018
17019 if (Result.getNode()) {
17020 Ops.push_back(Result);
17021 return;
17022 }
17023
17024 // Handle standard constraint letters.
17025 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17026}
17027
17030 SelectionDAG &DAG) const {
17031 if (I.getNumOperands() <= 1)
17032 return;
17033 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17034 return;
17035 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17036 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17037 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17038 return;
17039
17040 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17041 Ops.push_back(DAG.getMDNode(MDN));
17042}
17043
17044// isLegalAddressingMode - Return true if the addressing mode represented
17045// by AM is legal for this target, for a load/store of the specified type.
17047 const AddrMode &AM, Type *Ty,
17048 unsigned AS,
17049 Instruction *I) const {
17050 // Vector type r+i form is supported since power9 as DQ form. We don't check
17051 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17052 // imm form is preferred and the offset can be adjusted to use imm form later
17053 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17054 // max offset to check legal addressing mode, we should be a little aggressive
17055 // to contain other offsets for that LSRUse.
17056 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17057 return false;
17058
17059 // PPC allows a sign-extended 16-bit immediate field.
17060 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17061 return false;
17062
17063 // No global is ever allowed as a base.
17064 if (AM.BaseGV)
17065 return false;
17066
17067 // PPC only support r+r,
17068 switch (AM.Scale) {
17069 case 0: // "r+i" or just "i", depending on HasBaseReg.
17070 break;
17071 case 1:
17072 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17073 return false;
17074 // Otherwise we have r+r or r+i.
17075 break;
17076 case 2:
17077 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17078 return false;
17079 // Allow 2*r as r+r.
17080 break;
17081 default:
17082 // No other scales are supported.
17083 return false;
17084 }
17085
17086 return true;
17087}
17088
17089SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17090 SelectionDAG &DAG) const {
17092 MachineFrameInfo &MFI = MF.getFrameInfo();
17093 MFI.setReturnAddressIsTaken(true);
17094
17096 return SDValue();
17097
17098 SDLoc dl(Op);
17099 unsigned Depth = Op.getConstantOperandVal(0);
17100
17101 // Make sure the function does not optimize away the store of the RA to
17102 // the stack.
17103 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17104 FuncInfo->setLRStoreRequired();
17105 bool isPPC64 = Subtarget.isPPC64();
17106 auto PtrVT = getPointerTy(MF.getDataLayout());
17107
17108 if (Depth > 0) {
17109 // The link register (return address) is saved in the caller's frame
17110 // not the callee's stack frame. So we must get the caller's frame
17111 // address and load the return address at the LR offset from there.
17112 SDValue FrameAddr =
17113 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17114 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17115 SDValue Offset =
17116 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17117 isPPC64 ? MVT::i64 : MVT::i32);
17118 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17119 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17121 }
17122
17123 // Just load the return address off the stack.
17124 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17125 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17127}
17128
17129SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17130 SelectionDAG &DAG) const {
17131 SDLoc dl(Op);
17132 unsigned Depth = Op.getConstantOperandVal(0);
17133
17135 MachineFrameInfo &MFI = MF.getFrameInfo();
17136 MFI.setFrameAddressIsTaken(true);
17137
17138 EVT PtrVT = getPointerTy(MF.getDataLayout());
17139 bool isPPC64 = PtrVT == MVT::i64;
17140
17141 // Naked functions never have a frame pointer, and so we use r1. For all
17142 // other functions, this decision must be delayed until during PEI.
17143 unsigned FrameReg;
17144 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17145 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17146 else
17147 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17148
17149 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17150 PtrVT);
17151 while (Depth--)
17152 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17153 FrameAddr, MachinePointerInfo());
17154 return FrameAddr;
17155}
17156
17157// FIXME? Maybe this could be a TableGen attribute on some registers and
17158// this table could be generated automatically from RegInfo.
17160 const MachineFunction &MF) const {
17161 bool isPPC64 = Subtarget.isPPC64();
17162
17163 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17164 if (!is64Bit && VT != LLT::scalar(32))
17165 report_fatal_error("Invalid register global variable type");
17166
17168 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17169 .Case("r2", isPPC64 ? Register() : PPC::R2)
17170 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17171 .Default(Register());
17172
17173 if (Reg)
17174 return Reg;
17175 report_fatal_error("Invalid register name global variable");
17176}
17177
17179 // 32-bit SVR4 ABI access everything as got-indirect.
17180 if (Subtarget.is32BitELFABI())
17181 return true;
17182
17183 // AIX accesses everything indirectly through the TOC, which is similar to
17184 // the GOT.
17185 if (Subtarget.isAIXABI())
17186 return true;
17187
17189 // If it is small or large code model, module locals are accessed
17190 // indirectly by loading their address from .toc/.got.
17191 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17192 return true;
17193
17194 // JumpTable and BlockAddress are accessed as got-indirect.
17195 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17196 return true;
17197
17198 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17199 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17200
17201 return false;
17202}
17203
17204bool
17206 // The PowerPC target isn't yet aware of offsets.
17207 return false;
17208}
17209
17211 const CallInst &I,
17212 MachineFunction &MF,
17213 unsigned Intrinsic) const {
17214 switch (Intrinsic) {
17215 case Intrinsic::ppc_atomicrmw_xchg_i128:
17216 case Intrinsic::ppc_atomicrmw_add_i128:
17217 case Intrinsic::ppc_atomicrmw_sub_i128:
17218 case Intrinsic::ppc_atomicrmw_nand_i128:
17219 case Intrinsic::ppc_atomicrmw_and_i128:
17220 case Intrinsic::ppc_atomicrmw_or_i128:
17221 case Intrinsic::ppc_atomicrmw_xor_i128:
17222 case Intrinsic::ppc_cmpxchg_i128:
17224 Info.memVT = MVT::i128;
17225 Info.ptrVal = I.getArgOperand(0);
17226 Info.offset = 0;
17227 Info.align = Align(16);
17230 return true;
17231 case Intrinsic::ppc_atomic_load_i128:
17233 Info.memVT = MVT::i128;
17234 Info.ptrVal = I.getArgOperand(0);
17235 Info.offset = 0;
17236 Info.align = Align(16);
17238 return true;
17239 case Intrinsic::ppc_atomic_store_i128:
17241 Info.memVT = MVT::i128;
17242 Info.ptrVal = I.getArgOperand(2);
17243 Info.offset = 0;
17244 Info.align = Align(16);
17246 return true;
17247 case Intrinsic::ppc_altivec_lvx:
17248 case Intrinsic::ppc_altivec_lvxl:
17249 case Intrinsic::ppc_altivec_lvebx:
17250 case Intrinsic::ppc_altivec_lvehx:
17251 case Intrinsic::ppc_altivec_lvewx:
17252 case Intrinsic::ppc_vsx_lxvd2x:
17253 case Intrinsic::ppc_vsx_lxvw4x:
17254 case Intrinsic::ppc_vsx_lxvd2x_be:
17255 case Intrinsic::ppc_vsx_lxvw4x_be:
17256 case Intrinsic::ppc_vsx_lxvl:
17257 case Intrinsic::ppc_vsx_lxvll: {
17258 EVT VT;
17259 switch (Intrinsic) {
17260 case Intrinsic::ppc_altivec_lvebx:
17261 VT = MVT::i8;
17262 break;
17263 case Intrinsic::ppc_altivec_lvehx:
17264 VT = MVT::i16;
17265 break;
17266 case Intrinsic::ppc_altivec_lvewx:
17267 VT = MVT::i32;
17268 break;
17269 case Intrinsic::ppc_vsx_lxvd2x:
17270 case Intrinsic::ppc_vsx_lxvd2x_be:
17271 VT = MVT::v2f64;
17272 break;
17273 default:
17274 VT = MVT::v4i32;
17275 break;
17276 }
17277
17279 Info.memVT = VT;
17280 Info.ptrVal = I.getArgOperand(0);
17281 Info.offset = -VT.getStoreSize()+1;
17282 Info.size = 2*VT.getStoreSize()-1;
17283 Info.align = Align(1);
17285 return true;
17286 }
17287 case Intrinsic::ppc_altivec_stvx:
17288 case Intrinsic::ppc_altivec_stvxl:
17289 case Intrinsic::ppc_altivec_stvebx:
17290 case Intrinsic::ppc_altivec_stvehx:
17291 case Intrinsic::ppc_altivec_stvewx:
17292 case Intrinsic::ppc_vsx_stxvd2x:
17293 case Intrinsic::ppc_vsx_stxvw4x:
17294 case Intrinsic::ppc_vsx_stxvd2x_be:
17295 case Intrinsic::ppc_vsx_stxvw4x_be:
17296 case Intrinsic::ppc_vsx_stxvl:
17297 case Intrinsic::ppc_vsx_stxvll: {
17298 EVT VT;
17299 switch (Intrinsic) {
17300 case Intrinsic::ppc_altivec_stvebx:
17301 VT = MVT::i8;
17302 break;
17303 case Intrinsic::ppc_altivec_stvehx:
17304 VT = MVT::i16;
17305 break;
17306 case Intrinsic::ppc_altivec_stvewx:
17307 VT = MVT::i32;
17308 break;
17309 case Intrinsic::ppc_vsx_stxvd2x:
17310 case Intrinsic::ppc_vsx_stxvd2x_be:
17311 VT = MVT::v2f64;
17312 break;
17313 default:
17314 VT = MVT::v4i32;
17315 break;
17316 }
17317
17319 Info.memVT = VT;
17320 Info.ptrVal = I.getArgOperand(1);
17321 Info.offset = -VT.getStoreSize()+1;
17322 Info.size = 2*VT.getStoreSize()-1;
17323 Info.align = Align(1);
17325 return true;
17326 }
17327 case Intrinsic::ppc_stdcx:
17328 case Intrinsic::ppc_stwcx:
17329 case Intrinsic::ppc_sthcx:
17330 case Intrinsic::ppc_stbcx: {
17331 EVT VT;
17332 auto Alignment = Align(8);
17333 switch (Intrinsic) {
17334 case Intrinsic::ppc_stdcx:
17335 VT = MVT::i64;
17336 break;
17337 case Intrinsic::ppc_stwcx:
17338 VT = MVT::i32;
17339 Alignment = Align(4);
17340 break;
17341 case Intrinsic::ppc_sthcx:
17342 VT = MVT::i16;
17343 Alignment = Align(2);
17344 break;
17345 case Intrinsic::ppc_stbcx:
17346 VT = MVT::i8;
17347 Alignment = Align(1);
17348 break;
17349 }
17351 Info.memVT = VT;
17352 Info.ptrVal = I.getArgOperand(0);
17353 Info.offset = 0;
17354 Info.align = Alignment;
17356 return true;
17357 }
17358 default:
17359 break;
17360 }
17361
17362 return false;
17363}
17364
17365/// It returns EVT::Other if the type should be determined using generic
17366/// target-independent logic.
17368 const MemOp &Op, const AttributeList &FuncAttributes) const {
17369 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17370 // We should use Altivec/VSX loads and stores when available. For unaligned
17371 // addresses, unaligned VSX loads are only fast starting with the P8.
17372 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17373 if (Op.isMemset() && Subtarget.hasVSX()) {
17374 uint64_t TailSize = Op.size() % 16;
17375 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17376 // element if vector element type matches tail store. For tail size
17377 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17378 if (TailSize > 2 && TailSize <= 4) {
17379 return MVT::v8i16;
17380 }
17381 return MVT::v4i32;
17382 }
17383 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17384 return MVT::v4i32;
17385 }
17386 }
17387
17388 if (Subtarget.isPPC64()) {
17389 return MVT::i64;
17390 }
17391
17392 return MVT::i32;
17393}
17394
17395/// Returns true if it is beneficial to convert a load of a constant
17396/// to just the constant itself.
17398 Type *Ty) const {
17399 assert(Ty->isIntegerTy());
17400
17401 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17402 return !(BitSize == 0 || BitSize > 64);
17403}
17404
17406 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17407 return false;
17408 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17409 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17410 return NumBits1 == 64 && NumBits2 == 32;
17411}
17412
17414 if (!VT1.isInteger() || !VT2.isInteger())
17415 return false;
17416 unsigned NumBits1 = VT1.getSizeInBits();
17417 unsigned NumBits2 = VT2.getSizeInBits();
17418 return NumBits1 == 64 && NumBits2 == 32;
17419}
17420
17422 // Generally speaking, zexts are not free, but they are free when they can be
17423 // folded with other operations.
17424 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17425 EVT MemVT = LD->getMemoryVT();
17426 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17427 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17428 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17429 LD->getExtensionType() == ISD::ZEXTLOAD))
17430 return true;
17431 }
17432
17433 // FIXME: Add other cases...
17434 // - 32-bit shifts with a zext to i64
17435 // - zext after ctlz, bswap, etc.
17436 // - zext after and by a constant mask
17437
17438 return TargetLowering::isZExtFree(Val, VT2);
17439}
17440
17441bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17442 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17443 "invalid fpext types");
17444 // Extending to float128 is not free.
17445 if (DestVT == MVT::f128)
17446 return false;
17447 return true;
17448}
17449
17451 return isInt<16>(Imm) || isUInt<16>(Imm);
17452}
17453
17455 return isInt<16>(Imm) || isUInt<16>(Imm);
17456}
17457
17460 unsigned *Fast) const {
17462 return false;
17463
17464 // PowerPC supports unaligned memory access for simple non-vector types.
17465 // Although accessing unaligned addresses is not as efficient as accessing
17466 // aligned addresses, it is generally more efficient than manual expansion,
17467 // and generally only traps for software emulation when crossing page
17468 // boundaries.
17469
17470 if (!VT.isSimple())
17471 return false;
17472
17473 if (VT.isFloatingPoint() && !VT.isVector() &&
17474 !Subtarget.allowsUnalignedFPAccess())
17475 return false;
17476
17477 if (VT.getSimpleVT().isVector()) {
17478 if (Subtarget.hasVSX()) {
17479 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17480 VT != MVT::v4f32 && VT != MVT::v4i32)
17481 return false;
17482 } else {
17483 return false;
17484 }
17485 }
17486
17487 if (VT == MVT::ppcf128)
17488 return false;
17489
17490 if (Fast)
17491 *Fast = 1;
17492
17493 return true;
17494}
17495
17497 SDValue C) const {
17498 // Check integral scalar types.
17499 if (!VT.isScalarInteger())
17500 return false;
17501 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17502 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17503 return false;
17504 // This transformation will generate >= 2 operations. But the following
17505 // cases will generate <= 2 instructions during ISEL. So exclude them.
17506 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17507 // HW instruction, ie. MULLI
17508 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17509 // instruction is needed than case 1, ie. MULLI and RLDICR
17510 int64_t Imm = ConstNode->getSExtValue();
17511 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17512 Imm >>= Shift;
17513 if (isInt<16>(Imm))
17514 return false;
17515 uint64_t UImm = static_cast<uint64_t>(Imm);
17516 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17517 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17518 return true;
17519 }
17520 return false;
17521}
17522
17524 EVT VT) const {
17527}
17528
17530 Type *Ty) const {
17531 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17532 return false;
17533 switch (Ty->getScalarType()->getTypeID()) {
17534 case Type::FloatTyID:
17535 case Type::DoubleTyID:
17536 return true;
17537 case Type::FP128TyID:
17538 return Subtarget.hasP9Vector();
17539 default:
17540 return false;
17541 }
17542}
17543
17544// FIXME: add more patterns which are not profitable to hoist.
17546 if (!I->hasOneUse())
17547 return true;
17548
17549 Instruction *User = I->user_back();
17550 assert(User && "A single use instruction with no uses.");
17551
17552 switch (I->getOpcode()) {
17553 case Instruction::FMul: {
17554 // Don't break FMA, PowerPC prefers FMA.
17555 if (User->getOpcode() != Instruction::FSub &&
17556 User->getOpcode() != Instruction::FAdd)
17557 return true;
17558
17560 const Function *F = I->getFunction();
17561 const DataLayout &DL = F->getDataLayout();
17562 Type *Ty = User->getOperand(0)->getType();
17563
17564 return !(
17567 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17568 }
17569 case Instruction::Load: {
17570 // Don't break "store (load float*)" pattern, this pattern will be combined
17571 // to "store (load int32)" in later InstCombine pass. See function
17572 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17573 // cycles than loading a 32 bit integer.
17574 LoadInst *LI = cast<LoadInst>(I);
17575 // For the loads that combineLoadToOperationType does nothing, like
17576 // ordered load, it should be profitable to hoist them.
17577 // For swifterror load, it can only be used for pointer to pointer type, so
17578 // later type check should get rid of this case.
17579 if (!LI->isUnordered())
17580 return true;
17581
17582 if (User->getOpcode() != Instruction::Store)
17583 return true;
17584
17585 if (I->getType()->getTypeID() != Type::FloatTyID)
17586 return true;
17587
17588 return false;
17589 }
17590 default:
17591 return true;
17592 }
17593 return true;
17594}
17595
17596const MCPhysReg *
17598 // LR is a callee-save register, but we must treat it as clobbered by any call
17599 // site. Hence we include LR in the scratch registers, which are in turn added
17600 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17601 // to CTR, which is used by any indirect call.
17602 static const MCPhysReg ScratchRegs[] = {
17603 PPC::X12, PPC::LR8, PPC::CTR8, 0
17604 };
17605
17606 return ScratchRegs;
17607}
17608
17610 const Constant *PersonalityFn) const {
17611 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17612}
17613
17615 const Constant *PersonalityFn) const {
17616 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17617}
17618
17619bool
17621 EVT VT , unsigned DefinedValues) const {
17622 if (VT == MVT::v2i64)
17623 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17624
17625 if (Subtarget.hasVSX())
17626 return true;
17627
17629}
17630
17632 if (DisableILPPref || Subtarget.enableMachineScheduler())
17634
17635 return Sched::ILP;
17636}
17637
17638// Create a fast isel object.
17639FastISel *
17641 const TargetLibraryInfo *LibInfo) const {
17642 return PPC::createFastISel(FuncInfo, LibInfo);
17643}
17644
17645// 'Inverted' means the FMA opcode after negating one multiplicand.
17646// For example, (fma -a b c) = (fnmsub a b c)
17647static unsigned invertFMAOpcode(unsigned Opc) {
17648 switch (Opc) {
17649 default:
17650 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17651 case ISD::FMA:
17652 return PPCISD::FNMSUB;
17653 case PPCISD::FNMSUB:
17654 return ISD::FMA;
17655 }
17656}
17657
17659 bool LegalOps, bool OptForSize,
17661 unsigned Depth) const {
17663 return SDValue();
17664
17665 unsigned Opc = Op.getOpcode();
17666 EVT VT = Op.getValueType();
17667 SDNodeFlags Flags = Op.getNode()->getFlags();
17668
17669 switch (Opc) {
17670 case PPCISD::FNMSUB:
17671 if (!Op.hasOneUse() || !isTypeLegal(VT))
17672 break;
17673
17675 SDValue N0 = Op.getOperand(0);
17676 SDValue N1 = Op.getOperand(1);
17677 SDValue N2 = Op.getOperand(2);
17678 SDLoc Loc(Op);
17679
17681 SDValue NegN2 =
17682 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17683
17684 if (!NegN2)
17685 return SDValue();
17686
17687 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17688 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17689 // These transformations may change sign of zeroes. For example,
17690 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17691 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17692 // Try and choose the cheaper one to negate.
17694 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17695 N0Cost, Depth + 1);
17696
17698 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17699 N1Cost, Depth + 1);
17700
17701 if (NegN0 && N0Cost <= N1Cost) {
17702 Cost = std::min(N0Cost, N2Cost);
17703 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17704 } else if (NegN1) {
17705 Cost = std::min(N1Cost, N2Cost);
17706 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17707 }
17708 }
17709
17710 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17711 if (isOperationLegal(ISD::FMA, VT)) {
17712 Cost = N2Cost;
17713 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17714 }
17715
17716 break;
17717 }
17718
17719 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17720 Cost, Depth);
17721}
17722
17723// Override to enable LOAD_STACK_GUARD lowering on Linux.
17725 if (!Subtarget.isTargetLinux())
17727 return true;
17728}
17729
17730// Override to disable global variable loading on Linux and insert AIX canary
17731// word declaration.
17733 if (Subtarget.isAIXABI()) {
17734 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17735 PointerType::getUnqual(M.getContext()));
17736 return;
17737 }
17738 if (!Subtarget.isTargetLinux())
17740}
17741
17743 if (Subtarget.isAIXABI())
17744 return M.getGlobalVariable(AIXSSPCanaryWordName);
17746}
17747
17749 bool ForCodeSize) const {
17750 if (!VT.isSimple() || !Subtarget.hasVSX())
17751 return false;
17752
17753 switch(VT.getSimpleVT().SimpleTy) {
17754 default:
17755 // For FP types that are currently not supported by PPC backend, return
17756 // false. Examples: f16, f80.
17757 return false;
17758 case MVT::f32:
17759 case MVT::f64: {
17760 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17761 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17762 return true;
17763 }
17764 bool IsExact;
17765 APSInt IntResult(16, false);
17766 // The rounding mode doesn't really matter because we only care about floats
17767 // that can be converted to integers exactly.
17768 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17769 // For exact values in the range [-16, 15] we can materialize the float.
17770 if (IsExact && IntResult <= 15 && IntResult >= -16)
17771 return true;
17772 return Imm.isZero();
17773 }
17774 case MVT::ppcf128:
17775 return Imm.isPosZero();
17776 }
17777}
17778
17779// For vector shift operation op, fold
17780// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17782 SelectionDAG &DAG) {
17783 SDValue N0 = N->getOperand(0);
17784 SDValue N1 = N->getOperand(1);
17785 EVT VT = N0.getValueType();
17786 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17787 unsigned Opcode = N->getOpcode();
17788 unsigned TargetOpcode;
17789
17790 switch (Opcode) {
17791 default:
17792 llvm_unreachable("Unexpected shift operation");
17793 case ISD::SHL:
17794 TargetOpcode = PPCISD::SHL;
17795 break;
17796 case ISD::SRL:
17797 TargetOpcode = PPCISD::SRL;
17798 break;
17799 case ISD::SRA:
17800 TargetOpcode = PPCISD::SRA;
17801 break;
17802 }
17803
17804 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17805 N1->getOpcode() == ISD::AND)
17806 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17807 if (Mask->getZExtValue() == OpSizeInBits - 1)
17808 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17809
17810 return SDValue();
17811}
17812
17813SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17814 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17815 return Value;
17816
17817 SDValue N0 = N->getOperand(0);
17818 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17819 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17820 N0.getOpcode() != ISD::SIGN_EXTEND ||
17821 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17822 N->getValueType(0) != MVT::i64)
17823 return SDValue();
17824
17825 // We can't save an operation here if the value is already extended, and
17826 // the existing shift is easier to combine.
17827 SDValue ExtsSrc = N0.getOperand(0);
17828 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17829 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17830 return SDValue();
17831
17832 SDLoc DL(N0);
17833 SDValue ShiftBy = SDValue(CN1, 0);
17834 // We want the shift amount to be i32 on the extswli, but the shift could
17835 // have an i64.
17836 if (ShiftBy.getValueType() == MVT::i64)
17837 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17838
17839 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17840 ShiftBy);
17841}
17842
17843SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17844 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17845 return Value;
17846
17847 return SDValue();
17848}
17849
17850SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17851 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17852 return Value;
17853
17854 return SDValue();
17855}
17856
17857// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17858// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17859// When C is zero, the equation (addi Z, -C) can be simplified to Z
17860// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17862 const PPCSubtarget &Subtarget) {
17863 if (!Subtarget.isPPC64())
17864 return SDValue();
17865
17866 SDValue LHS = N->getOperand(0);
17867 SDValue RHS = N->getOperand(1);
17868
17869 auto isZextOfCompareWithConstant = [](SDValue Op) {
17870 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17871 Op.getValueType() != MVT::i64)
17872 return false;
17873
17874 SDValue Cmp = Op.getOperand(0);
17875 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17876 Cmp.getOperand(0).getValueType() != MVT::i64)
17877 return false;
17878
17879 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17880 int64_t NegConstant = 0 - Constant->getSExtValue();
17881 // Due to the limitations of the addi instruction,
17882 // -C is required to be [-32768, 32767].
17883 return isInt<16>(NegConstant);
17884 }
17885
17886 return false;
17887 };
17888
17889 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17890 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17891
17892 // If there is a pattern, canonicalize a zext operand to the RHS.
17893 if (LHSHasPattern && !RHSHasPattern)
17894 std::swap(LHS, RHS);
17895 else if (!LHSHasPattern && !RHSHasPattern)
17896 return SDValue();
17897
17898 SDLoc DL(N);
17899 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17900 SDValue Cmp = RHS.getOperand(0);
17901 SDValue Z = Cmp.getOperand(0);
17902 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17903 int64_t NegConstant = 0 - Constant->getSExtValue();
17904
17905 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17906 default: break;
17907 case ISD::SETNE: {
17908 // when C == 0
17909 // --> addze X, (addic Z, -1).carry
17910 // /
17911 // add X, (zext(setne Z, C))--
17912 // \ when -32768 <= -C <= 32767 && C != 0
17913 // --> addze X, (addic (addi Z, -C), -1).carry
17914 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17915 DAG.getConstant(NegConstant, DL, MVT::i64));
17916 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17917 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17918 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17919 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17920 SDValue(Addc.getNode(), 1));
17921 }
17922 case ISD::SETEQ: {
17923 // when C == 0
17924 // --> addze X, (subfic Z, 0).carry
17925 // /
17926 // add X, (zext(sete Z, C))--
17927 // \ when -32768 <= -C <= 32767 && C != 0
17928 // --> addze X, (subfic (addi Z, -C), 0).carry
17929 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17930 DAG.getConstant(NegConstant, DL, MVT::i64));
17931 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17932 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17933 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17934 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17935 SDValue(Subc.getNode(), 1));
17936 }
17937 }
17938
17939 return SDValue();
17940}
17941
17942// Transform
17943// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17944// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17945// In this case both C1 and C2 must be known constants.
17946// C1+C2 must fit into a 34 bit signed integer.
17948 const PPCSubtarget &Subtarget) {
17949 if (!Subtarget.isUsingPCRelativeCalls())
17950 return SDValue();
17951
17952 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17953 // If we find that node try to cast the Global Address and the Constant.
17954 SDValue LHS = N->getOperand(0);
17955 SDValue RHS = N->getOperand(1);
17956
17957 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17958 std::swap(LHS, RHS);
17959
17960 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17961 return SDValue();
17962
17963 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17964 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17965 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17966
17967 // Check that both casts succeeded.
17968 if (!GSDN || !ConstNode)
17969 return SDValue();
17970
17971 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17972 SDLoc DL(GSDN);
17973
17974 // The signed int offset needs to fit in 34 bits.
17975 if (!isInt<34>(NewOffset))
17976 return SDValue();
17977
17978 // The new global address is a copy of the old global address except
17979 // that it has the updated Offset.
17980 SDValue GA =
17981 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17982 NewOffset, GSDN->getTargetFlags());
17983 SDValue MatPCRel =
17984 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17985 return MatPCRel;
17986}
17987
17988SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17989 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17990 return Value;
17991
17992 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17993 return Value;
17994
17995 return SDValue();
17996}
17997
17998// Detect TRUNCATE operations on bitcasts of float128 values.
17999// What we are looking for here is the situtation where we extract a subset
18000// of bits from a 128 bit float.
18001// This can be of two forms:
18002// 1) BITCAST of f128 feeding TRUNCATE
18003// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18004// The reason this is required is because we do not have a legal i128 type
18005// and so we want to prevent having to store the f128 and then reload part
18006// of it.
18007SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18008 DAGCombinerInfo &DCI) const {
18009 // If we are using CRBits then try that first.
18010 if (Subtarget.useCRBits()) {
18011 // Check if CRBits did anything and return that if it did.
18012 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18013 return CRTruncValue;
18014 }
18015
18016 SDLoc dl(N);
18017 SDValue Op0 = N->getOperand(0);
18018
18019 // Looking for a truncate of i128 to i64.
18020 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18021 return SDValue();
18022
18023 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18024
18025 // SRL feeding TRUNCATE.
18026 if (Op0.getOpcode() == ISD::SRL) {
18027 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18028 // The right shift has to be by 64 bits.
18029 if (!ConstNode || ConstNode->getZExtValue() != 64)
18030 return SDValue();
18031
18032 // Switch the element number to extract.
18033 EltToExtract = EltToExtract ? 0 : 1;
18034 // Update Op0 past the SRL.
18035 Op0 = Op0.getOperand(0);
18036 }
18037
18038 // BITCAST feeding a TRUNCATE possibly via SRL.
18039 if (Op0.getOpcode() == ISD::BITCAST &&
18040 Op0.getValueType() == MVT::i128 &&
18041 Op0.getOperand(0).getValueType() == MVT::f128) {
18042 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18043 return DCI.DAG.getNode(
18044 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18045 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18046 }
18047 return SDValue();
18048}
18049
18050SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18051 SelectionDAG &DAG = DCI.DAG;
18052
18053 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18054 if (!ConstOpOrElement)
18055 return SDValue();
18056
18057 // An imul is usually smaller than the alternative sequence for legal type.
18059 isOperationLegal(ISD::MUL, N->getValueType(0)))
18060 return SDValue();
18061
18062 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18063 switch (this->Subtarget.getCPUDirective()) {
18064 default:
18065 // TODO: enhance the condition for subtarget before pwr8
18066 return false;
18067 case PPC::DIR_PWR8:
18068 // type mul add shl
18069 // scalar 4 1 1
18070 // vector 7 2 2
18071 return true;
18072 case PPC::DIR_PWR9:
18073 case PPC::DIR_PWR10:
18074 case PPC::DIR_PWR11:
18076 // type mul add shl
18077 // scalar 5 2 2
18078 // vector 7 2 2
18079
18080 // The cycle RATIO of related operations are showed as a table above.
18081 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18082 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18083 // are 4, it is always profitable; but for 3 instrs patterns
18084 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18085 // So we should only do it for vector type.
18086 return IsAddOne && IsNeg ? VT.isVector() : true;
18087 }
18088 };
18089
18090 EVT VT = N->getValueType(0);
18091 SDLoc DL(N);
18092
18093 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18094 bool IsNeg = MulAmt.isNegative();
18095 APInt MulAmtAbs = MulAmt.abs();
18096
18097 if ((MulAmtAbs - 1).isPowerOf2()) {
18098 // (mul x, 2^N + 1) => (add (shl x, N), x)
18099 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18100
18101 if (!IsProfitable(IsNeg, true, VT))
18102 return SDValue();
18103
18104 SDValue Op0 = N->getOperand(0);
18105 SDValue Op1 =
18106 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18107 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18108 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18109
18110 if (!IsNeg)
18111 return Res;
18112
18113 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18114 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18115 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18116 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18117
18118 if (!IsProfitable(IsNeg, false, VT))
18119 return SDValue();
18120
18121 SDValue Op0 = N->getOperand(0);
18122 SDValue Op1 =
18123 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18124 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18125
18126 if (!IsNeg)
18127 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18128 else
18129 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18130
18131 } else {
18132 return SDValue();
18133 }
18134}
18135
18136// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18137// in combiner since we need to check SD flags and other subtarget features.
18138SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18139 DAGCombinerInfo &DCI) const {
18140 SDValue N0 = N->getOperand(0);
18141 SDValue N1 = N->getOperand(1);
18142 SDValue N2 = N->getOperand(2);
18143 SDNodeFlags Flags = N->getFlags();
18144 EVT VT = N->getValueType(0);
18145 SelectionDAG &DAG = DCI.DAG;
18147 unsigned Opc = N->getOpcode();
18148 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18149 bool LegalOps = !DCI.isBeforeLegalizeOps();
18150 SDLoc Loc(N);
18151
18152 if (!isOperationLegal(ISD::FMA, VT))
18153 return SDValue();
18154
18155 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18156 // since (fnmsub a b c)=-0 while c-ab=+0.
18157 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18158 return SDValue();
18159
18160 // (fma (fneg a) b c) => (fnmsub a b c)
18161 // (fnmsub (fneg a) b c) => (fma a b c)
18162 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18163 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18164
18165 // (fma a (fneg b) c) => (fnmsub a b c)
18166 // (fnmsub a (fneg b) c) => (fma a b c)
18167 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18168 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18169
18170 return SDValue();
18171}
18172
18173bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18174 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18175 if (!Subtarget.is64BitELFABI())
18176 return false;
18177
18178 // If not a tail call then no need to proceed.
18179 if (!CI->isTailCall())
18180 return false;
18181
18182 // If sibling calls have been disabled and tail-calls aren't guaranteed
18183 // there is no reason to duplicate.
18184 auto &TM = getTargetMachine();
18185 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18186 return false;
18187
18188 // Can't tail call a function called indirectly, or if it has variadic args.
18189 const Function *Callee = CI->getCalledFunction();
18190 if (!Callee || Callee->isVarArg())
18191 return false;
18192
18193 // Make sure the callee and caller calling conventions are eligible for tco.
18194 const Function *Caller = CI->getParent()->getParent();
18195 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18196 CI->getCallingConv()))
18197 return false;
18198
18199 // If the function is local then we have a good chance at tail-calling it
18200 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18201}
18202
18203bool PPCTargetLowering::
18204isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18205 const Value *Mask = AndI.getOperand(1);
18206 // If the mask is suitable for andi. or andis. we should sink the and.
18207 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18208 // Can't handle constants wider than 64-bits.
18209 if (CI->getBitWidth() > 64)
18210 return false;
18211 int64_t ConstVal = CI->getZExtValue();
18212 return isUInt<16>(ConstVal) ||
18213 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18214 }
18215
18216 // For non-constant masks, we can always use the record-form and.
18217 return true;
18218}
18219
18220/// getAddrModeForFlags - Based on the set of address flags, select the most
18221/// optimal instruction format to match by.
18222PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18223 // This is not a node we should be handling here.
18224 if (Flags == PPC::MOF_None)
18225 return PPC::AM_None;
18226 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18227 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18228 if ((Flags & FlagSet) == FlagSet)
18229 return PPC::AM_DForm;
18230 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18231 if ((Flags & FlagSet) == FlagSet)
18232 return PPC::AM_DSForm;
18233 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18234 if ((Flags & FlagSet) == FlagSet)
18235 return PPC::AM_DQForm;
18236 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18237 if ((Flags & FlagSet) == FlagSet)
18238 return PPC::AM_PrefixDForm;
18239 // If no other forms are selected, return an X-Form as it is the most
18240 // general addressing mode.
18241 return PPC::AM_XForm;
18242}
18243
18244/// Set alignment flags based on whether or not the Frame Index is aligned.
18245/// Utilized when computing flags for address computation when selecting
18246/// load and store instructions.
18247static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18248 SelectionDAG &DAG) {
18249 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18250 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18251 if (!FI)
18252 return;
18254 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18255 // If this is (add $FI, $S16Imm), the alignment flags are already set
18256 // based on the immediate. We just need to clear the alignment flags
18257 // if the FI alignment is weaker.
18258 if ((FrameIndexAlign % 4) != 0)
18259 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18260 if ((FrameIndexAlign % 16) != 0)
18261 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18262 // If the address is a plain FrameIndex, set alignment flags based on
18263 // FI alignment.
18264 if (!IsAdd) {
18265 if ((FrameIndexAlign % 4) == 0)
18266 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18267 if ((FrameIndexAlign % 16) == 0)
18268 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18269 }
18270}
18271
18272/// Given a node, compute flags that are used for address computation when
18273/// selecting load and store instructions. The flags computed are stored in
18274/// FlagSet. This function takes into account whether the node is a constant,
18275/// an ADD, OR, or a constant, and computes the address flags accordingly.
18276static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18277 SelectionDAG &DAG) {
18278 // Set the alignment flags for the node depending on if the node is
18279 // 4-byte or 16-byte aligned.
18280 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18281 if ((Imm & 0x3) == 0)
18282 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18283 if ((Imm & 0xf) == 0)
18284 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18285 };
18286
18287 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18288 // All 32-bit constants can be computed as LIS + Disp.
18289 const APInt &ConstImm = CN->getAPIntValue();
18290 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18291 FlagSet |= PPC::MOF_AddrIsSImm32;
18292 SetAlignFlagsForImm(ConstImm.getZExtValue());
18293 setAlignFlagsForFI(N, FlagSet, DAG);
18294 }
18295 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18296 FlagSet |= PPC::MOF_RPlusSImm34;
18297 else // Let constant materialization handle large constants.
18298 FlagSet |= PPC::MOF_NotAddNorCst;
18299 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18300 // This address can be represented as an addition of:
18301 // - Register + Imm16 (possibly a multiple of 4/16)
18302 // - Register + Imm34
18303 // - Register + PPCISD::Lo
18304 // - Register + Register
18305 // In any case, we won't have to match this as Base + Zero.
18306 SDValue RHS = N.getOperand(1);
18307 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18308 const APInt &ConstImm = CN->getAPIntValue();
18309 if (ConstImm.isSignedIntN(16)) {
18310 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18311 SetAlignFlagsForImm(ConstImm.getZExtValue());
18312 setAlignFlagsForFI(N, FlagSet, DAG);
18313 }
18314 if (ConstImm.isSignedIntN(34))
18315 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18316 else
18317 FlagSet |= PPC::MOF_RPlusR; // Register.
18318 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18319 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18320 else
18321 FlagSet |= PPC::MOF_RPlusR;
18322 } else { // The address computation is not a constant or an addition.
18323 setAlignFlagsForFI(N, FlagSet, DAG);
18324 FlagSet |= PPC::MOF_NotAddNorCst;
18325 }
18326}
18327
18328static bool isPCRelNode(SDValue N) {
18329 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18330 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18331 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18332 isValidPCRelNode<JumpTableSDNode>(N) ||
18333 isValidPCRelNode<BlockAddressSDNode>(N));
18334}
18335
18336/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18337/// the address flags of the load/store instruction that is to be matched.
18338unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18339 SelectionDAG &DAG) const {
18340 unsigned FlagSet = PPC::MOF_None;
18341
18342 // Compute subtarget flags.
18343 if (!Subtarget.hasP9Vector())
18344 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18345 else
18346 FlagSet |= PPC::MOF_SubtargetP9;
18347
18348 if (Subtarget.hasPrefixInstrs())
18349 FlagSet |= PPC::MOF_SubtargetP10;
18350
18351 if (Subtarget.hasSPE())
18352 FlagSet |= PPC::MOF_SubtargetSPE;
18353
18354 // Check if we have a PCRel node and return early.
18355 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18356 return FlagSet;
18357
18358 // If the node is the paired load/store intrinsics, compute flags for
18359 // address computation and return early.
18360 unsigned ParentOp = Parent->getOpcode();
18361 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18362 (ParentOp == ISD::INTRINSIC_VOID))) {
18363 unsigned ID = Parent->getConstantOperandVal(1);
18364 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18365 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18366 ? Parent->getOperand(2)
18367 : Parent->getOperand(3);
18368 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18369 FlagSet |= PPC::MOF_Vector;
18370 return FlagSet;
18371 }
18372 }
18373
18374 // Mark this as something we don't want to handle here if it is atomic
18375 // or pre-increment instruction.
18376 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18377 if (LSB->isIndexed())
18378 return PPC::MOF_None;
18379
18380 // Compute in-memory type flags. This is based on if there are scalars,
18381 // floats or vectors.
18382 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18383 assert(MN && "Parent should be a MemSDNode!");
18384 EVT MemVT = MN->getMemoryVT();
18385 unsigned Size = MemVT.getSizeInBits();
18386 if (MemVT.isScalarInteger()) {
18387 assert(Size <= 128 &&
18388 "Not expecting scalar integers larger than 16 bytes!");
18389 if (Size < 32)
18390 FlagSet |= PPC::MOF_SubWordInt;
18391 else if (Size == 32)
18392 FlagSet |= PPC::MOF_WordInt;
18393 else
18394 FlagSet |= PPC::MOF_DoubleWordInt;
18395 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18396 if (Size == 128)
18397 FlagSet |= PPC::MOF_Vector;
18398 else if (Size == 256) {
18399 assert(Subtarget.pairedVectorMemops() &&
18400 "256-bit vectors are only available when paired vector memops is "
18401 "enabled!");
18402 FlagSet |= PPC::MOF_Vector;
18403 } else
18404 llvm_unreachable("Not expecting illegal vectors!");
18405 } else { // Floating point type: can be scalar, f128 or vector types.
18406 if (Size == 32 || Size == 64)
18407 FlagSet |= PPC::MOF_ScalarFloat;
18408 else if (MemVT == MVT::f128 || MemVT.isVector())
18409 FlagSet |= PPC::MOF_Vector;
18410 else
18411 llvm_unreachable("Not expecting illegal scalar floats!");
18412 }
18413
18414 // Compute flags for address computation.
18415 computeFlagsForAddressComputation(N, FlagSet, DAG);
18416
18417 // Compute type extension flags.
18418 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18419 switch (LN->getExtensionType()) {
18420 case ISD::SEXTLOAD:
18421 FlagSet |= PPC::MOF_SExt;
18422 break;
18423 case ISD::EXTLOAD:
18424 case ISD::ZEXTLOAD:
18425 FlagSet |= PPC::MOF_ZExt;
18426 break;
18427 case ISD::NON_EXTLOAD:
18428 FlagSet |= PPC::MOF_NoExt;
18429 break;
18430 }
18431 } else
18432 FlagSet |= PPC::MOF_NoExt;
18433
18434 // For integers, no extension is the same as zero extension.
18435 // We set the extension mode to zero extension so we don't have
18436 // to add separate entries in AddrModesMap for loads and stores.
18437 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18438 FlagSet |= PPC::MOF_ZExt;
18439 FlagSet &= ~PPC::MOF_NoExt;
18440 }
18441
18442 // If we don't have prefixed instructions, 34-bit constants should be
18443 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18444 bool IsNonP1034BitConst =
18446 FlagSet) == PPC::MOF_RPlusSImm34;
18447 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18448 IsNonP1034BitConst)
18449 FlagSet |= PPC::MOF_NotAddNorCst;
18450
18451 return FlagSet;
18452}
18453
18454/// SelectForceXFormMode - Given the specified address, force it to be
18455/// represented as an indexed [r+r] operation (an XForm instruction).
18457 SDValue &Base,
18458 SelectionDAG &DAG) const {
18459
18461 int16_t ForceXFormImm = 0;
18462 if (provablyDisjointOr(DAG, N) &&
18463 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18464 Disp = N.getOperand(0);
18465 Base = N.getOperand(1);
18466 return Mode;
18467 }
18468
18469 // If the address is the result of an add, we will utilize the fact that the
18470 // address calculation includes an implicit add. However, we can reduce
18471 // register pressure if we do not materialize a constant just for use as the
18472 // index register. We only get rid of the add if it is not an add of a
18473 // value and a 16-bit signed constant and both have a single use.
18474 if (N.getOpcode() == ISD::ADD &&
18475 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18476 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18477 Disp = N.getOperand(0);
18478 Base = N.getOperand(1);
18479 return Mode;
18480 }
18481
18482 // Otherwise, use R0 as the base register.
18483 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18484 N.getValueType());
18485 Base = N;
18486
18487 return Mode;
18488}
18489
18491 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18492 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18493 EVT ValVT = Val.getValueType();
18494 // If we are splitting a scalar integer into f64 parts (i.e. so they
18495 // can be placed into VFRC registers), we need to zero extend and
18496 // bitcast the values. This will ensure the value is placed into a
18497 // VSR using direct moves or stack operations as needed.
18498 if (PartVT == MVT::f64 &&
18499 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18500 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18501 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18502 Parts[0] = Val;
18503 return true;
18504 }
18505 return false;
18506}
18507
18508SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18509 SelectionDAG &DAG) const {
18510 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18512 EVT RetVT = Op.getValueType();
18513 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18514 SDValue Callee =
18515 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18516 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18519 for (const SDValue &N : Op->op_values()) {
18520 EVT ArgVT = N.getValueType();
18521 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18522 Entry.Node = N;
18523 Entry.Ty = ArgTy;
18524 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18525 Entry.IsZExt = !Entry.IsSExt;
18526 Args.push_back(Entry);
18527 }
18528
18529 SDValue InChain = DAG.getEntryNode();
18530 SDValue TCChain = InChain;
18531 const Function &F = DAG.getMachineFunction().getFunction();
18532 bool isTailCall =
18533 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18534 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18535 if (isTailCall)
18536 InChain = TCChain;
18537 CLI.setDebugLoc(SDLoc(Op))
18538 .setChain(InChain)
18539 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18540 .setTailCall(isTailCall)
18541 .setSExtResult(SignExtend)
18542 .setZExtResult(!SignExtend)
18544 return TLI.LowerCallTo(CLI).first;
18545}
18546
18547SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18548 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18549 SelectionDAG &DAG) const {
18550 if (Op.getValueType() == MVT::f32)
18551 return lowerToLibCall(LibCallFloatName, Op, DAG);
18552
18553 if (Op.getValueType() == MVT::f64)
18554 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18555
18556 return SDValue();
18557}
18558
18559bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18560 SDNodeFlags Flags = Op.getNode()->getFlags();
18561 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18562 Flags.hasNoNaNs() && Flags.hasNoInfs();
18563}
18564
18565bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18566 return Op.getNode()->getFlags().hasApproximateFuncs();
18567}
18568
18569bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18571}
18572
18573SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18574 const char *LibCallFloatName,
18575 const char *LibCallDoubleNameFinite,
18576 const char *LibCallFloatNameFinite,
18577 SDValue Op,
18578 SelectionDAG &DAG) const {
18579 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18580 return SDValue();
18581
18582 if (!isLowringToMASSFiniteSafe(Op))
18583 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18584 DAG);
18585
18586 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18587 LibCallDoubleNameFinite, Op, DAG);
18588}
18589
18590SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18591 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18592 "__xl_powf_finite", Op, DAG);
18593}
18594
18595SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18596 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18597 "__xl_sinf_finite", Op, DAG);
18598}
18599
18600SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18601 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18602 "__xl_cosf_finite", Op, DAG);
18603}
18604
18605SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18606 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18607 "__xl_logf_finite", Op, DAG);
18608}
18609
18610SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18611 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18612 "__xl_log10f_finite", Op, DAG);
18613}
18614
18615SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18616 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18617 "__xl_expf_finite", Op, DAG);
18618}
18619
18620// If we happen to match to an aligned D-Form, check if the Frame Index is
18621// adequately aligned. If it is not, reset the mode to match to X-Form.
18622static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18623 PPC::AddrMode &Mode) {
18624 if (!isa<FrameIndexSDNode>(N))
18625 return;
18626 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18627 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18628 Mode = PPC::AM_XForm;
18629}
18630
18631/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18632/// compute the address flags of the node, get the optimal address mode based
18633/// on the flags, and set the Base and Disp based on the address mode.
18635 SDValue N, SDValue &Disp,
18636 SDValue &Base,
18637 SelectionDAG &DAG,
18638 MaybeAlign Align) const {
18639 SDLoc DL(Parent);
18640
18641 // Compute the address flags.
18642 unsigned Flags = computeMOFlags(Parent, N, DAG);
18643
18644 // Get the optimal address mode based on the Flags.
18645 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18646
18647 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18648 // Select an X-Form load if it is not.
18649 setXFormForUnalignedFI(N, Flags, Mode);
18650
18651 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18652 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18653 assert(Subtarget.isUsingPCRelativeCalls() &&
18654 "Must be using PC-Relative calls when a valid PC-Relative node is "
18655 "present!");
18656 Mode = PPC::AM_PCRel;
18657 }
18658
18659 // Set Base and Disp accordingly depending on the address mode.
18660 switch (Mode) {
18661 case PPC::AM_DForm:
18662 case PPC::AM_DSForm:
18663 case PPC::AM_DQForm: {
18664 // This is a register plus a 16-bit immediate. The base will be the
18665 // register and the displacement will be the immediate unless it
18666 // isn't sufficiently aligned.
18667 if (Flags & PPC::MOF_RPlusSImm16) {
18668 SDValue Op0 = N.getOperand(0);
18669 SDValue Op1 = N.getOperand(1);
18670 int16_t Imm = Op1->getAsZExtVal();
18671 if (!Align || isAligned(*Align, Imm)) {
18672 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18673 Base = Op0;
18674 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18675 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18676 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18677 }
18678 break;
18679 }
18680 }
18681 // This is a register plus the @lo relocation. The base is the register
18682 // and the displacement is the global address.
18683 else if (Flags & PPC::MOF_RPlusLo) {
18684 Disp = N.getOperand(1).getOperand(0); // The global address.
18689 Base = N.getOperand(0);
18690 break;
18691 }
18692 // This is a constant address at most 32 bits. The base will be
18693 // zero or load-immediate-shifted and the displacement will be
18694 // the low 16 bits of the address.
18695 else if (Flags & PPC::MOF_AddrIsSImm32) {
18696 auto *CN = cast<ConstantSDNode>(N);
18697 EVT CNType = CN->getValueType(0);
18698 uint64_t CNImm = CN->getZExtValue();
18699 // If this address fits entirely in a 16-bit sext immediate field, codegen
18700 // this as "d, 0".
18701 int16_t Imm;
18702 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18703 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18704 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18705 CNType);
18706 break;
18707 }
18708 // Handle 32-bit sext immediate with LIS + Addr mode.
18709 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18710 (!Align || isAligned(*Align, CNImm))) {
18711 int32_t Addr = (int32_t)CNImm;
18712 // Otherwise, break this down into LIS + Disp.
18713 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18714 Base =
18715 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18716 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18717 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18718 break;
18719 }
18720 }
18721 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18722 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18723 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18724 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18725 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18726 } else
18727 Base = N;
18728 break;
18729 }
18730 case PPC::AM_PrefixDForm: {
18731 int64_t Imm34 = 0;
18732 unsigned Opcode = N.getOpcode();
18733 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18734 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18735 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18736 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18737 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18738 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18739 else
18740 Base = N.getOperand(0);
18741 } else if (isIntS34Immediate(N, Imm34)) {
18742 // The address is a 34-bit signed immediate.
18743 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18744 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18745 }
18746 break;
18747 }
18748 case PPC::AM_PCRel: {
18749 // When selecting PC-Relative instructions, "Base" is not utilized as
18750 // we select the address as [PC+imm].
18751 Disp = N;
18752 break;
18753 }
18754 case PPC::AM_None:
18755 break;
18756 default: { // By default, X-Form is always available to be selected.
18757 // When a frame index is not aligned, we also match by XForm.
18758 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18759 Base = FI ? N : N.getOperand(1);
18760 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18761 N.getValueType())
18762 : N.getOperand(0);
18763 break;
18764 }
18765 }
18766 return Mode;
18767}
18768
18770 bool Return,
18771 bool IsVarArg) const {
18772 switch (CC) {
18773 case CallingConv::Cold:
18774 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18775 default:
18776 return CC_PPC64_ELF;
18777 }
18778}
18779
18781 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18782}
18783
18786 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18787 if (shouldInlineQuadwordAtomics() && Size == 128)
18789
18790 switch (AI->getOperation()) {
18794 default:
18796 }
18797
18798 llvm_unreachable("unreachable atomicrmw operation");
18799}
18800
18803 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18804 if (shouldInlineQuadwordAtomics() && Size == 128)
18807}
18808
18809static Intrinsic::ID
18811 switch (BinOp) {
18812 default:
18813 llvm_unreachable("Unexpected AtomicRMW BinOp");
18815 return Intrinsic::ppc_atomicrmw_xchg_i128;
18816 case AtomicRMWInst::Add:
18817 return Intrinsic::ppc_atomicrmw_add_i128;
18818 case AtomicRMWInst::Sub:
18819 return Intrinsic::ppc_atomicrmw_sub_i128;
18820 case AtomicRMWInst::And:
18821 return Intrinsic::ppc_atomicrmw_and_i128;
18822 case AtomicRMWInst::Or:
18823 return Intrinsic::ppc_atomicrmw_or_i128;
18824 case AtomicRMWInst::Xor:
18825 return Intrinsic::ppc_atomicrmw_xor_i128;
18827 return Intrinsic::ppc_atomicrmw_nand_i128;
18828 }
18829}
18830
18832 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18833 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18834 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18835 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18836 Type *ValTy = Incr->getType();
18837 assert(ValTy->getPrimitiveSizeInBits() == 128);
18840 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18841 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18842 Value *IncrHi =
18843 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18844 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18845 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18846 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18847 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18848 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18849 return Builder.CreateOr(
18850 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18851}
18852
18854 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18855 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18856 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18857 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18858 Type *ValTy = CmpVal->getType();
18859 assert(ValTy->getPrimitiveSizeInBits() == 128);
18860 Function *IntCmpXchg =
18861 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18862 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18863 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18864 Value *CmpHi =
18865 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18866 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18867 Value *NewHi =
18868 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18869 emitLeadingFence(Builder, CI, Ord);
18870 Value *LoHi =
18871 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18872 emitTrailingFence(Builder, CI, Ord);
18873 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18874 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18875 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18876 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18877 return Builder.CreateOr(
18878 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18879}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
This defines the Use class.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5317
bool isDenormal() const
Definition: APFloat.h:1355
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1387
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:429
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
APInt abs() const
Get the absolute value.
Definition: APInt.h:1753
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:415
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:451
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1680
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:787
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:168
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1971
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1385
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1391
unsigned arg_size() const
Definition: InstrTypes.h:1408
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:757
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
BasicBlockListType::const_iterator const_iterator
Definition: Function.h:70
arg_iterator arg_begin()
Definition: Function.h:831
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
size_t arg_size() const
Definition: Function.h:864
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:582
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:183
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2026
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:174
bool isUnordered() const
Definition: Instructions.h:247
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:220
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:260
bool isAIXABI() const
Definition: PPCSubtarget.h:215
bool useSoftFloat() const
Definition: PPCSubtarget.h:175
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:143
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:203
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:254
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:272
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:146
bool isSVR4ABI() const
Definition: PPCSubtarget.h:216
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:135
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:207
bool isLittleEndian() const
Definition: PPCSubtarget.h:182
bool isTargetLinux() const
Definition: PPCSubtarget.h:213
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:278
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:290
bool is64BitELFABI() const
Definition: PPCSubtarget.h:219
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:156
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:296
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:153
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:266
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:490
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:494
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:452
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:747
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:843
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:488
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:742
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:489
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:788
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:691
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:783
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:483
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:814
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:860
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:501
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:754
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:571
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1169
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1165
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1198
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1242
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:976
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1090
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1264
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1031
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1099
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1194
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:910
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1021
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1291
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1084
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1140
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1225
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1251
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1137
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1189
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1183
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1363
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1248
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1611
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1617
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:91
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:555
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:279
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:254
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)