LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
226
227 // Custom lower inline assembly to check for special registers.
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
474
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
542
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
574
583 } else {
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
644 } else
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
650 else
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
685
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
697 }
710
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
738 } else {
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
755 }
756
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
772 // 64-bit PowerPC wants to expand i128 shifts itself.
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
788 }
791
792 if (Subtarget.hasVSX()) {
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
813 }
814 // First set operation action for all vector types to expand. Then we
815 // will selectively turn on ones that can be effectively codegen'd.
817 // add/sub are legal for all supported vector VT's.
820
821 // For v2i64, these are only valid with P8Vector. This is corrected after
822 // the loop.
823 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
828 }
829 else {
834 }
835
836 if (Subtarget.hasVSX()) {
842 }
843
844 // Vector instructions introduced in P8
845 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
848 }
849 else {
852 }
853
854 // Vector instructions introduced in P9
855 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
857 else
859
860 // We promote all shuffles to v16i8.
862 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
863
864 // We promote all non-typed operations to v4i32.
866 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
868 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
870 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
872 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
874 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
877 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
879 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
880
881 // No other operations are legal.
920
921 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922 setTruncStoreAction(VT, InnerVT, Expand);
925 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
926 }
927 }
929 if (!Subtarget.hasP8Vector()) {
930 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
931 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
932 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
933 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
934 }
935
936 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937 // with merges, splats, etc.
939
940 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
941 // are cheap, so handle them before they get expanded to scalar.
947
948 setOperationAction(ISD::AND , MVT::v4i32, Legal);
949 setOperationAction(ISD::OR , MVT::v4i32, Legal);
950 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
951 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
953 Subtarget.useCRBits() ? Legal : Expand);
954 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
964 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
967
968 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
970 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
971 if (Subtarget.hasAltivec())
972 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
974 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975 if (Subtarget.hasP8Altivec())
976 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
977
978 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
979 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
980 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
981 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
982
983 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
984 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
985
986 if (Subtarget.hasVSX()) {
987 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
988 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
990 }
991
992 if (Subtarget.hasP8Altivec())
993 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
994 else
995 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
996
997 if (Subtarget.isISA3_1()) {
998 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
999 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1000 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1001 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1002 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1003 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1004 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1005 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1006 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1007 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1008 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1009 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1010 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1011 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1012 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1013 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1014 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1015 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1016 }
1017
1018 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1019 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1020
1023 // LE is P8+/64-bit so direct moves are supported and these operations
1024 // are legal. The custom transformation requires 64-bit since we need a
1025 // pair of stores that will cover a 128-bit load for P10.
1026 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1030 }
1031
1036
1037 // Altivec does not contain unordered floating-point compare instructions
1038 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1039 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1040 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1041 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1042
1043 if (Subtarget.hasVSX()) {
1046 if (Subtarget.hasP8Vector()) {
1049 }
1050 if (Subtarget.hasDirectMove() && isPPC64) {
1059 }
1061
1062 // The nearbyint variants are not allowed to raise the inexact exception
1063 // so we can only code-gen them with fpexcept.ignore.
1068
1069 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1070 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1071 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1072 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1073 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1076
1077 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1078 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1081
1082 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1083 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1084
1085 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1086 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1087
1088 // Share the Altivec comparison restrictions.
1089 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1090 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1091 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1092 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1093
1094 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1095 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1096
1098
1099 if (Subtarget.hasP8Vector())
1100 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1101
1102 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1103
1104 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1105 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1106 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1107
1108 if (Subtarget.hasP8Altivec()) {
1109 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1110 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1111 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1112
1113 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1114 // SRL, but not for SRA because of the instructions available:
1115 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116 // doing
1117 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1118 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1119 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1120
1121 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1122 }
1123 else {
1124 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1125 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1126 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1127
1128 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1129
1130 // VSX v2i64 only supports non-arithmetic operations.
1131 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1132 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1133 }
1134
1135 if (Subtarget.isISA3_1())
1136 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1137 else
1138 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1139
1140 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1141 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1143 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1144
1146
1155
1156 // Custom handling for partial vectors of integers converted to
1157 // floating point. We already have optimal handling for v2i32 through
1158 // the DAG combine, so those aren't necessary.
1175
1176 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1177 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1178 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1179 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1182
1185
1186 // Handle constrained floating-point operations of vector.
1187 // The predictor is `hasVSX` because altivec instruction has
1188 // no exception but VSX vector instruction has.
1202
1216
1217 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1218 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1219
1220 for (MVT FPT : MVT::fp_valuetypes())
1221 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1222
1223 // Expand the SELECT to SELECT_CC
1225
1226 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1227 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1228
1229 // No implementation for these ops for PowerPC.
1231 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1232 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1233 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1236 }
1237
1238 if (Subtarget.hasP8Altivec()) {
1239 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1240 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1241 }
1242
1243 if (Subtarget.hasP9Vector()) {
1246
1247 // Test data class instructions store results in CR bits.
1248 if (Subtarget.useCRBits()) {
1253 }
1254
1255 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1256 // SRL, but not for SRA because of the instructions available:
1257 // VS{RL} and VS{RL}O.
1258 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1259 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1260 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1261
1262 setOperationAction(ISD::FADD, MVT::f128, Legal);
1263 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1264 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1265 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1267
1268 setOperationAction(ISD::FMA, MVT::f128, Legal);
1275
1277 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1279 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1282
1286
1287 // Handle constrained floating-point operations of fp128
1304 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1305 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1306 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1307 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1308 } else if (Subtarget.hasVSX()) {
1311
1312 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1313 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1314
1315 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316 // fp_to_uint and int_to_fp.
1319
1320 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1321 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1322 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1323 setOperationAction(ISD::FABS, MVT::f128, Expand);
1325 setOperationAction(ISD::FMA, MVT::f128, Expand);
1327
1328 // Expand the fp_extend if the target type is fp128.
1331
1332 // Expand the fp_round if the source type is fp128.
1333 for (MVT VT : {MVT::f32, MVT::f64}) {
1336 }
1337
1342
1343 // Lower following f128 select_cc pattern:
1344 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1346
1347 // We need to handle f128 SELECT_CC with integer result type.
1349 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1350 }
1351
1352 if (Subtarget.hasP9Altivec()) {
1353 if (Subtarget.isISA3_1()) {
1358 } else {
1361 }
1369
1370 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1371 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1372 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1373 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1374 }
1375
1376 if (Subtarget.hasP10Vector()) {
1378 }
1379 }
1380
1381 if (Subtarget.pairedVectorMemops()) {
1382 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1383 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1384 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1385 }
1386 if (Subtarget.hasMMA()) {
1387 if (Subtarget.isISAFuture()) {
1388 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1389 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1390 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1391 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1392 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1393 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1394 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1395 } else {
1396 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1397 }
1398 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1399 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1401 }
1402
1403 if (Subtarget.has64BitSupport())
1405
1406 if (Subtarget.isISA3_1())
1407 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1408
1409 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1410
1411 if (!isPPC64) {
1414 }
1415
1420 }
1421
1423
1424 if (Subtarget.hasAltivec()) {
1425 // Altivec instructions set fields to all zeros or all ones.
1427 }
1428
1431 else if (isPPC64)
1433 else
1435
1436 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438 // We have target-specific dag combine patterns for the following nodes:
1441 if (Subtarget.hasFPCVT())
1444 if (Subtarget.useCRBits())
1448
1450
1452
1453 if (Subtarget.useCRBits()) {
1455 }
1456
1457 // With 32 condition bits, we don't need to sink (and duplicate) compares
1458 // aggressively in CodeGenPrep.
1459 if (Subtarget.useCRBits()) {
1461 }
1462
1463 // TODO: The default entry number is set to 64. This stops most jump table
1464 // generation on PPC. But it is good for current PPC HWs because the indirect
1465 // branch instruction mtctr to the jump table may lead to bad branch predict.
1466 // Re-evaluate this value on future HWs that can do better with mtctr.
1468
1469 // The default minimum of largest number in a BitTest cluster is 3.
1471
1473 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1474
1475 auto CPUDirective = Subtarget.getCPUDirective();
1476 switch (CPUDirective) {
1477 default: break;
1478 case PPC::DIR_970:
1479 case PPC::DIR_A2:
1480 case PPC::DIR_E500:
1481 case PPC::DIR_E500mc:
1482 case PPC::DIR_E5500:
1483 case PPC::DIR_PWR4:
1484 case PPC::DIR_PWR5:
1485 case PPC::DIR_PWR5X:
1486 case PPC::DIR_PWR6:
1487 case PPC::DIR_PWR6X:
1488 case PPC::DIR_PWR7:
1489 case PPC::DIR_PWR8:
1490 case PPC::DIR_PWR9:
1491 case PPC::DIR_PWR10:
1492 case PPC::DIR_PWR11:
1496 break;
1497 }
1498
1499 if (Subtarget.enableMachineScheduler())
1501 else
1503
1505
1506 // The Freescale cores do better with aggressive inlining of memcpy and
1507 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1508 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1509 MaxStoresPerMemset = 32;
1511 MaxStoresPerMemcpy = 32;
1515 } else if (CPUDirective == PPC::DIR_A2) {
1516 // The A2 also benefits from (very) aggressive inlining of memcpy and
1517 // friends. The overhead of a the function call, even when warm, can be
1518 // over one hundred cycles.
1519 MaxStoresPerMemset = 128;
1520 MaxStoresPerMemcpy = 128;
1521 MaxStoresPerMemmove = 128;
1522 MaxLoadsPerMemcmp = 128;
1523 } else {
1526 }
1527
1528 // Enable generation of STXVP instructions by default for mcpu=future.
1529 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1530 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1531 DisableAutoPairedVecSt = false;
1532
1533 IsStrictFPEnabled = true;
1534
1535 // Let the subtarget (CPU) decide if a predictable select is more expensive
1536 // than the corresponding branch. This information is used in CGP to decide
1537 // when to convert selects into branches.
1538 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1539
1541}
1542
1543// *********************************** NOTE ************************************
1544// For selecting load and store instructions, the addressing modes are defined
1545// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1546// patterns to match the load the store instructions.
1547//
1548// The TD definitions for the addressing modes correspond to their respective
1549// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1550// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1551// address mode flags of a particular node. Afterwards, the computed address
1552// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1553// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1554// accordingly, based on the preferred addressing mode.
1555//
1556// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1557// MemOpFlags contains all the possible flags that can be used to compute the
1558// optimal addressing mode for load and store instructions.
1559// AddrMode contains all the possible load and store addressing modes available
1560// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1561//
1562// When adding new load and store instructions, it is possible that new address
1563// flags may need to be added into MemOpFlags, and a new addressing mode will
1564// need to be added to AddrMode. An entry of the new addressing mode (consisting
1565// of the minimal and main distinguishing address flags for the new load/store
1566// instructions) will need to be added into initializeAddrModeMap() below.
1567// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1568// need to be updated to account for selecting the optimal addressing mode.
1569// *****************************************************************************
1570/// Initialize the map that relates the different addressing modes of the load
1571/// and store instructions to a set of flags. This ensures the load/store
1572/// instruction is correctly matched during instruction selection.
1573void PPCTargetLowering::initializeAddrModeMap() {
1574 AddrModesMap[PPC::AM_DForm] = {
1575 // LWZ, STW
1580 // LBZ, LHZ, STB, STH
1585 // LHA
1590 // LFS, LFD, STFS, STFD
1595 };
1596 AddrModesMap[PPC::AM_DSForm] = {
1597 // LWA
1601 // LD, STD
1605 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1609 };
1610 AddrModesMap[PPC::AM_DQForm] = {
1611 // LXV, STXV
1615 };
1616 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1618 // TODO: Add mapping for quadword load/store.
1619}
1620
1621/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1622/// the desired ByVal argument alignment.
1623static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1624 if (MaxAlign == MaxMaxAlign)
1625 return;
1626 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1627 if (MaxMaxAlign >= 32 &&
1628 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1629 MaxAlign = Align(32);
1630 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1631 MaxAlign < 16)
1632 MaxAlign = Align(16);
1633 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1634 Align EltAlign;
1635 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1636 if (EltAlign > MaxAlign)
1637 MaxAlign = EltAlign;
1638 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1639 for (auto *EltTy : STy->elements()) {
1640 Align EltAlign;
1641 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1642 if (EltAlign > MaxAlign)
1643 MaxAlign = EltAlign;
1644 if (MaxAlign == MaxMaxAlign)
1645 break;
1646 }
1647 }
1648}
1649
1650/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1651/// function arguments in the caller parameter area.
1653 const DataLayout &DL) const {
1654 // 16byte and wider vectors are passed on 16byte boundary.
1655 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1656 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1657 if (Subtarget.hasAltivec())
1658 getMaxByValAlign(Ty, Alignment, Align(16));
1659 return Alignment;
1660}
1661
1663 return Subtarget.useSoftFloat();
1664}
1665
1667 return Subtarget.hasSPE();
1668}
1669
1671 return VT.isScalarInteger();
1672}
1673
1675 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1676 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1677 return false;
1678
1679 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1680 if (VTy->getScalarType()->isIntegerTy()) {
1681 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1682 if (ElemSizeInBits == 32) {
1683 Index = Subtarget.isLittleEndian() ? 2 : 1;
1684 return true;
1685 }
1686 if (ElemSizeInBits == 64) {
1687 Index = Subtarget.isLittleEndian() ? 1 : 0;
1688 return true;
1689 }
1690 }
1691 }
1692 return false;
1693}
1694
1696 EVT VT) const {
1697 if (!VT.isVector())
1698 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1699
1701}
1702
1704 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1705 return true;
1706}
1707
1708//===----------------------------------------------------------------------===//
1709// Node matching predicates, for use by the tblgen matching code.
1710//===----------------------------------------------------------------------===//
1711
1712/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1715 return CFP->getValueAPF().isZero();
1716 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1717 // Maybe this has already been legalized into the constant pool?
1718 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1719 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1720 return CFP->getValueAPF().isZero();
1721 }
1722 return false;
1723}
1724
1725/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1726/// true if Op is undef or if it matches the specified value.
1727static bool isConstantOrUndef(int Op, int Val) {
1728 return Op < 0 || Op == Val;
1729}
1730
1731/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1732/// VPKUHUM instruction.
1733/// The ShuffleKind distinguishes between big-endian operations with
1734/// two different inputs (0), either-endian operations with two identical
1735/// inputs (1), and little-endian operations with two different inputs (2).
1736/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1738 SelectionDAG &DAG) {
1739 bool IsLE = DAG.getDataLayout().isLittleEndian();
1740 if (ShuffleKind == 0) {
1741 if (IsLE)
1742 return false;
1743 for (unsigned i = 0; i != 16; ++i)
1744 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1745 return false;
1746 } else if (ShuffleKind == 2) {
1747 if (!IsLE)
1748 return false;
1749 for (unsigned i = 0; i != 16; ++i)
1750 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1751 return false;
1752 } else if (ShuffleKind == 1) {
1753 unsigned j = IsLE ? 0 : 1;
1754 for (unsigned i = 0; i != 8; ++i)
1755 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1756 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1757 return false;
1758 }
1759 return true;
1760}
1761
1762/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1763/// VPKUWUM instruction.
1764/// The ShuffleKind distinguishes between big-endian operations with
1765/// two different inputs (0), either-endian operations with two identical
1766/// inputs (1), and little-endian operations with two different inputs (2).
1767/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1769 SelectionDAG &DAG) {
1770 bool IsLE = DAG.getDataLayout().isLittleEndian();
1771 if (ShuffleKind == 0) {
1772 if (IsLE)
1773 return false;
1774 for (unsigned i = 0; i != 16; i += 2)
1775 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1776 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1777 return false;
1778 } else if (ShuffleKind == 2) {
1779 if (!IsLE)
1780 return false;
1781 for (unsigned i = 0; i != 16; i += 2)
1782 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1783 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1784 return false;
1785 } else if (ShuffleKind == 1) {
1786 unsigned j = IsLE ? 0 : 2;
1787 for (unsigned i = 0; i != 8; i += 2)
1788 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1789 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1790 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1791 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1792 return false;
1793 }
1794 return true;
1795}
1796
1797/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1798/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1799/// current subtarget.
1800///
1801/// The ShuffleKind distinguishes between big-endian operations with
1802/// two different inputs (0), either-endian operations with two identical
1803/// inputs (1), and little-endian operations with two different inputs (2).
1804/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1806 SelectionDAG &DAG) {
1807 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1808 if (!Subtarget.hasP8Vector())
1809 return false;
1810
1811 bool IsLE = DAG.getDataLayout().isLittleEndian();
1812 if (ShuffleKind == 0) {
1813 if (IsLE)
1814 return false;
1815 for (unsigned i = 0; i != 16; i += 4)
1816 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1817 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1818 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1819 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1820 return false;
1821 } else if (ShuffleKind == 2) {
1822 if (!IsLE)
1823 return false;
1824 for (unsigned i = 0; i != 16; i += 4)
1825 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1826 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1827 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1828 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1829 return false;
1830 } else if (ShuffleKind == 1) {
1831 unsigned j = IsLE ? 0 : 4;
1832 for (unsigned i = 0; i != 8; i += 4)
1833 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1834 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1835 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1836 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1837 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1838 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1839 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1840 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1841 return false;
1842 }
1843 return true;
1844}
1845
1846/// isVMerge - Common function, used to match vmrg* shuffles.
1847///
1848static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1849 unsigned LHSStart, unsigned RHSStart) {
1850 if (N->getValueType(0) != MVT::v16i8)
1851 return false;
1852 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1853 "Unsupported merge size!");
1854
1855 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1856 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1857 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1858 LHSStart+j+i*UnitSize) ||
1859 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1860 RHSStart+j+i*UnitSize))
1861 return false;
1862 }
1863 return true;
1864}
1865
1866/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1867/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1868/// The ShuffleKind distinguishes between big-endian merges with two
1869/// different inputs (0), either-endian merges with two identical inputs (1),
1870/// and little-endian merges with two different inputs (2). For the latter,
1871/// the input operands are swapped (see PPCInstrAltivec.td).
1873 unsigned ShuffleKind, SelectionDAG &DAG) {
1874 if (DAG.getDataLayout().isLittleEndian()) {
1875 if (ShuffleKind == 1) // unary
1876 return isVMerge(N, UnitSize, 0, 0);
1877 else if (ShuffleKind == 2) // swapped
1878 return isVMerge(N, UnitSize, 0, 16);
1879 else
1880 return false;
1881 } else {
1882 if (ShuffleKind == 1) // unary
1883 return isVMerge(N, UnitSize, 8, 8);
1884 else if (ShuffleKind == 0) // normal
1885 return isVMerge(N, UnitSize, 8, 24);
1886 else
1887 return false;
1888 }
1889}
1890
1891/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1892/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1893/// The ShuffleKind distinguishes between big-endian merges with two
1894/// different inputs (0), either-endian merges with two identical inputs (1),
1895/// and little-endian merges with two different inputs (2). For the latter,
1896/// the input operands are swapped (see PPCInstrAltivec.td).
1898 unsigned ShuffleKind, SelectionDAG &DAG) {
1899 if (DAG.getDataLayout().isLittleEndian()) {
1900 if (ShuffleKind == 1) // unary
1901 return isVMerge(N, UnitSize, 8, 8);
1902 else if (ShuffleKind == 2) // swapped
1903 return isVMerge(N, UnitSize, 8, 24);
1904 else
1905 return false;
1906 } else {
1907 if (ShuffleKind == 1) // unary
1908 return isVMerge(N, UnitSize, 0, 0);
1909 else if (ShuffleKind == 0) // normal
1910 return isVMerge(N, UnitSize, 0, 16);
1911 else
1912 return false;
1913 }
1914}
1915
1916/**
1917 * Common function used to match vmrgew and vmrgow shuffles
1918 *
1919 * The indexOffset determines whether to look for even or odd words in
1920 * the shuffle mask. This is based on the of the endianness of the target
1921 * machine.
1922 * - Little Endian:
1923 * - Use offset of 0 to check for odd elements
1924 * - Use offset of 4 to check for even elements
1925 * - Big Endian:
1926 * - Use offset of 0 to check for even elements
1927 * - Use offset of 4 to check for odd elements
1928 * A detailed description of the vector element ordering for little endian and
1929 * big endian can be found at
1930 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1931 * Targeting your applications - what little endian and big endian IBM XL C/C++
1932 * compiler differences mean to you
1933 *
1934 * The mask to the shuffle vector instruction specifies the indices of the
1935 * elements from the two input vectors to place in the result. The elements are
1936 * numbered in array-access order, starting with the first vector. These vectors
1937 * are always of type v16i8, thus each vector will contain 16 elements of size
1938 * 8. More info on the shuffle vector can be found in the
1939 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1940 * Language Reference.
1941 *
1942 * The RHSStartValue indicates whether the same input vectors are used (unary)
1943 * or two different input vectors are used, based on the following:
1944 * - If the instruction uses the same vector for both inputs, the range of the
1945 * indices will be 0 to 15. In this case, the RHSStart value passed should
1946 * be 0.
1947 * - If the instruction has two different vectors then the range of the
1948 * indices will be 0 to 31. In this case, the RHSStart value passed should
1949 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1950 * to 31 specify elements in the second vector).
1951 *
1952 * \param[in] N The shuffle vector SD Node to analyze
1953 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1954 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1955 * vector to the shuffle_vector instruction
1956 * \return true iff this shuffle vector represents an even or odd word merge
1957 */
1958static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1959 unsigned RHSStartValue) {
1960 if (N->getValueType(0) != MVT::v16i8)
1961 return false;
1962
1963 for (unsigned i = 0; i < 2; ++i)
1964 for (unsigned j = 0; j < 4; ++j)
1965 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1966 i*RHSStartValue+j+IndexOffset) ||
1967 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1968 i*RHSStartValue+j+IndexOffset+8))
1969 return false;
1970 return true;
1971}
1972
1973/**
1974 * Determine if the specified shuffle mask is suitable for the vmrgew or
1975 * vmrgow instructions.
1976 *
1977 * \param[in] N The shuffle vector SD Node to analyze
1978 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1979 * \param[in] ShuffleKind Identify the type of merge:
1980 * - 0 = big-endian merge with two different inputs;
1981 * - 1 = either-endian merge with two identical inputs;
1982 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1983 * little-endian merges).
1984 * \param[in] DAG The current SelectionDAG
1985 * \return true iff this shuffle mask
1986 */
1988 unsigned ShuffleKind, SelectionDAG &DAG) {
1989 if (DAG.getDataLayout().isLittleEndian()) {
1990 unsigned indexOffset = CheckEven ? 4 : 0;
1991 if (ShuffleKind == 1) // Unary
1992 return isVMerge(N, indexOffset, 0);
1993 else if (ShuffleKind == 2) // swapped
1994 return isVMerge(N, indexOffset, 16);
1995 else
1996 return false;
1997 }
1998 else {
1999 unsigned indexOffset = CheckEven ? 0 : 4;
2000 if (ShuffleKind == 1) // Unary
2001 return isVMerge(N, indexOffset, 0);
2002 else if (ShuffleKind == 0) // Normal
2003 return isVMerge(N, indexOffset, 16);
2004 else
2005 return false;
2006 }
2007 return false;
2008}
2009
2010/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2011/// amount, otherwise return -1.
2012/// The ShuffleKind distinguishes between big-endian operations with two
2013/// different inputs (0), either-endian operations with two identical inputs
2014/// (1), and little-endian operations with two different inputs (2). For the
2015/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2016int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2017 SelectionDAG &DAG) {
2018 if (N->getValueType(0) != MVT::v16i8)
2019 return -1;
2020
2022
2023 // Find the first non-undef value in the shuffle mask.
2024 unsigned i;
2025 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2026 /*search*/;
2027
2028 if (i == 16) return -1; // all undef.
2029
2030 // Otherwise, check to see if the rest of the elements are consecutively
2031 // numbered from this value.
2032 unsigned ShiftAmt = SVOp->getMaskElt(i);
2033 if (ShiftAmt < i) return -1;
2034
2035 ShiftAmt -= i;
2036 bool isLE = DAG.getDataLayout().isLittleEndian();
2037
2038 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2039 // Check the rest of the elements to see if they are consecutive.
2040 for (++i; i != 16; ++i)
2041 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2042 return -1;
2043 } else if (ShuffleKind == 1) {
2044 // Check the rest of the elements to see if they are consecutive.
2045 for (++i; i != 16; ++i)
2046 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2047 return -1;
2048 } else
2049 return -1;
2050
2051 if (isLE)
2052 ShiftAmt = 16 - ShiftAmt;
2053
2054 return ShiftAmt;
2055}
2056
2057/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2058/// specifies a splat of a single element that is suitable for input to
2059/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2061 EVT VT = N->getValueType(0);
2062 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2063 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2064
2065 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2066 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2067
2068 // The consecutive indices need to specify an element, not part of two
2069 // different elements. So abandon ship early if this isn't the case.
2070 if (N->getMaskElt(0) % EltSize != 0)
2071 return false;
2072
2073 // This is a splat operation if each element of the permute is the same, and
2074 // if the value doesn't reference the second vector.
2075 unsigned ElementBase = N->getMaskElt(0);
2076
2077 // FIXME: Handle UNDEF elements too!
2078 if (ElementBase >= 16)
2079 return false;
2080
2081 // Check that the indices are consecutive, in the case of a multi-byte element
2082 // splatted with a v16i8 mask.
2083 for (unsigned i = 1; i != EltSize; ++i)
2084 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2085 return false;
2086
2087 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2088 // An UNDEF element is a sequence of UNDEF bytes.
2089 if (N->getMaskElt(i) < 0) {
2090 for (unsigned j = 1; j != EltSize; ++j)
2091 if (N->getMaskElt(i + j) >= 0)
2092 return false;
2093 } else
2094 for (unsigned j = 0; j != EltSize; ++j)
2095 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2096 return false;
2097 }
2098 return true;
2099}
2100
2101/// Check that the mask is shuffling N byte elements. Within each N byte
2102/// element of the mask, the indices could be either in increasing or
2103/// decreasing order as long as they are consecutive.
2104/// \param[in] N the shuffle vector SD Node to analyze
2105/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2106/// Word/DoubleWord/QuadWord).
2107/// \param[in] StepLen the delta indices number among the N byte element, if
2108/// the mask is in increasing/decreasing order then it is 1/-1.
2109/// \return true iff the mask is shuffling N byte elements.
2110static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2111 int StepLen) {
2112 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2113 "Unexpected element width.");
2114 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2115
2116 unsigned NumOfElem = 16 / Width;
2117 unsigned MaskVal[16]; // Width is never greater than 16
2118 for (unsigned i = 0; i < NumOfElem; ++i) {
2119 MaskVal[0] = N->getMaskElt(i * Width);
2120 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2121 return false;
2122 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2123 return false;
2124 }
2125
2126 for (unsigned int j = 1; j < Width; ++j) {
2127 MaskVal[j] = N->getMaskElt(i * Width + j);
2128 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2129 return false;
2130 }
2131 }
2132 }
2133
2134 return true;
2135}
2136
2137bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2138 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2139 if (!isNByteElemShuffleMask(N, 4, 1))
2140 return false;
2141
2142 // Now we look at mask elements 0,4,8,12
2143 unsigned M0 = N->getMaskElt(0) / 4;
2144 unsigned M1 = N->getMaskElt(4) / 4;
2145 unsigned M2 = N->getMaskElt(8) / 4;
2146 unsigned M3 = N->getMaskElt(12) / 4;
2147 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2148 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2149
2150 // Below, let H and L be arbitrary elements of the shuffle mask
2151 // where H is in the range [4,7] and L is in the range [0,3].
2152 // H, 1, 2, 3 or L, 5, 6, 7
2153 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2154 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2155 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2156 InsertAtByte = IsLE ? 12 : 0;
2157 Swap = M0 < 4;
2158 return true;
2159 }
2160 // 0, H, 2, 3 or 4, L, 6, 7
2161 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2162 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2163 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2164 InsertAtByte = IsLE ? 8 : 4;
2165 Swap = M1 < 4;
2166 return true;
2167 }
2168 // 0, 1, H, 3 or 4, 5, L, 7
2169 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2170 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2171 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2172 InsertAtByte = IsLE ? 4 : 8;
2173 Swap = M2 < 4;
2174 return true;
2175 }
2176 // 0, 1, 2, H or 4, 5, 6, L
2177 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2178 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2179 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2180 InsertAtByte = IsLE ? 0 : 12;
2181 Swap = M3 < 4;
2182 return true;
2183 }
2184
2185 // If both vector operands for the shuffle are the same vector, the mask will
2186 // contain only elements from the first one and the second one will be undef.
2187 if (N->getOperand(1).isUndef()) {
2188 ShiftElts = 0;
2189 Swap = true;
2190 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2191 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2192 InsertAtByte = IsLE ? 12 : 0;
2193 return true;
2194 }
2195 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2196 InsertAtByte = IsLE ? 8 : 4;
2197 return true;
2198 }
2199 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2200 InsertAtByte = IsLE ? 4 : 8;
2201 return true;
2202 }
2203 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2204 InsertAtByte = IsLE ? 0 : 12;
2205 return true;
2206 }
2207 }
2208
2209 return false;
2210}
2211
2213 bool &Swap, bool IsLE) {
2214 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2215 // Ensure each byte index of the word is consecutive.
2216 if (!isNByteElemShuffleMask(N, 4, 1))
2217 return false;
2218
2219 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2220 unsigned M0 = N->getMaskElt(0) / 4;
2221 unsigned M1 = N->getMaskElt(4) / 4;
2222 unsigned M2 = N->getMaskElt(8) / 4;
2223 unsigned M3 = N->getMaskElt(12) / 4;
2224
2225 // If both vector operands for the shuffle are the same vector, the mask will
2226 // contain only elements from the first one and the second one will be undef.
2227 if (N->getOperand(1).isUndef()) {
2228 assert(M0 < 4 && "Indexing into an undef vector?");
2229 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2230 return false;
2231
2232 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2233 Swap = false;
2234 return true;
2235 }
2236
2237 // Ensure each word index of the ShuffleVector Mask is consecutive.
2238 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2239 return false;
2240
2241 if (IsLE) {
2242 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2243 // Input vectors don't need to be swapped if the leading element
2244 // of the result is one of the 3 left elements of the second vector
2245 // (or if there is no shift to be done at all).
2246 Swap = false;
2247 ShiftElts = (8 - M0) % 8;
2248 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2249 // Input vectors need to be swapped if the leading element
2250 // of the result is one of the 3 left elements of the first vector
2251 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2252 Swap = true;
2253 ShiftElts = (4 - M0) % 4;
2254 }
2255
2256 return true;
2257 } else { // BE
2258 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2259 // Input vectors don't need to be swapped if the leading element
2260 // of the result is one of the 4 elements of the first vector.
2261 Swap = false;
2262 ShiftElts = M0;
2263 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2264 // Input vectors need to be swapped if the leading element
2265 // of the result is one of the 4 elements of the right vector.
2266 Swap = true;
2267 ShiftElts = M0 - 4;
2268 }
2269
2270 return true;
2271 }
2272}
2273
2275 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2276
2277 if (!isNByteElemShuffleMask(N, Width, -1))
2278 return false;
2279
2280 for (int i = 0; i < 16; i += Width)
2281 if (N->getMaskElt(i) != i + Width - 1)
2282 return false;
2283
2284 return true;
2285}
2286
2290
2294
2298
2302
2303/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2304/// if the inputs to the instruction should be swapped and set \p DM to the
2305/// value for the immediate.
2306/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2307/// AND element 0 of the result comes from the first input (LE) or second input
2308/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2309/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2310/// mask.
2312 bool &Swap, bool IsLE) {
2313 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2314
2315 // Ensure each byte index of the double word is consecutive.
2316 if (!isNByteElemShuffleMask(N, 8, 1))
2317 return false;
2318
2319 unsigned M0 = N->getMaskElt(0) / 8;
2320 unsigned M1 = N->getMaskElt(8) / 8;
2321 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2322
2323 // If both vector operands for the shuffle are the same vector, the mask will
2324 // contain only elements from the first one and the second one will be undef.
2325 if (N->getOperand(1).isUndef()) {
2326 if ((M0 | M1) < 2) {
2327 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2328 Swap = false;
2329 return true;
2330 } else
2331 return false;
2332 }
2333
2334 if (IsLE) {
2335 if (M0 > 1 && M1 < 2) {
2336 Swap = false;
2337 } else if (M0 < 2 && M1 > 1) {
2338 M0 = (M0 + 2) % 4;
2339 M1 = (M1 + 2) % 4;
2340 Swap = true;
2341 } else
2342 return false;
2343
2344 // Note: if control flow comes here that means Swap is already set above
2345 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2346 return true;
2347 } else { // BE
2348 if (M0 < 2 && M1 > 1) {
2349 Swap = false;
2350 } else if (M0 > 1 && M1 < 2) {
2351 M0 = (M0 + 2) % 4;
2352 M1 = (M1 + 2) % 4;
2353 Swap = true;
2354 } else
2355 return false;
2356
2357 // Note: if control flow comes here that means Swap is already set above
2358 DM = (M0 << 1) + (M1 & 1);
2359 return true;
2360 }
2361}
2362
2363
2364/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2365/// appropriate for PPC mnemonics (which have a big endian bias - namely
2366/// elements are counted from the left of the vector register).
2367unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2368 SelectionDAG &DAG) {
2370 assert(isSplatShuffleMask(SVOp, EltSize));
2371 EVT VT = SVOp->getValueType(0);
2372
2373 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2374 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2375 : SVOp->getMaskElt(0);
2376
2377 if (DAG.getDataLayout().isLittleEndian())
2378 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2379 else
2380 return SVOp->getMaskElt(0) / EltSize;
2381}
2382
2383/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2384/// by using a vspltis[bhw] instruction of the specified element size, return
2385/// the constant being splatted. The ByteSize field indicates the number of
2386/// bytes of each element [124] -> [bhw].
2388 SDValue OpVal;
2389
2390 // If ByteSize of the splat is bigger than the element size of the
2391 // build_vector, then we have a case where we are checking for a splat where
2392 // multiple elements of the buildvector are folded together into a single
2393 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2394 unsigned EltSize = 16/N->getNumOperands();
2395 if (EltSize < ByteSize) {
2396 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2397 SDValue UniquedVals[4];
2398 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2399
2400 // See if all of the elements in the buildvector agree across.
2401 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2402 if (N->getOperand(i).isUndef()) continue;
2403 // If the element isn't a constant, bail fully out.
2404 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2405
2406 if (!UniquedVals[i&(Multiple-1)].getNode())
2407 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2408 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2409 return SDValue(); // no match.
2410 }
2411
2412 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2413 // either constant or undef values that are identical for each chunk. See
2414 // if these chunks can form into a larger vspltis*.
2415
2416 // Check to see if all of the leading entries are either 0 or -1. If
2417 // neither, then this won't fit into the immediate field.
2418 bool LeadingZero = true;
2419 bool LeadingOnes = true;
2420 for (unsigned i = 0; i != Multiple-1; ++i) {
2421 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2422
2423 LeadingZero &= isNullConstant(UniquedVals[i]);
2424 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2425 }
2426 // Finally, check the least significant entry.
2427 if (LeadingZero) {
2428 if (!UniquedVals[Multiple-1].getNode())
2429 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2430 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2431 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2432 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2433 }
2434 if (LeadingOnes) {
2435 if (!UniquedVals[Multiple-1].getNode())
2436 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2437 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2438 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2439 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2440 }
2441
2442 return SDValue();
2443 }
2444
2445 // Check to see if this buildvec has a single non-undef value in its elements.
2446 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2447 if (N->getOperand(i).isUndef()) continue;
2448 if (!OpVal.getNode())
2449 OpVal = N->getOperand(i);
2450 else if (OpVal != N->getOperand(i))
2451 return SDValue();
2452 }
2453
2454 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2455
2456 unsigned ValSizeInBytes = EltSize;
2457 uint64_t Value = 0;
2458 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2459 Value = CN->getZExtValue();
2460 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2461 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2462 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2463 }
2464
2465 // If the splat value is larger than the element value, then we can never do
2466 // this splat. The only case that we could fit the replicated bits into our
2467 // immediate field for would be zero, and we prefer to use vxor for it.
2468 if (ValSizeInBytes < ByteSize) return SDValue();
2469
2470 // If the element value is larger than the splat value, check if it consists
2471 // of a repeated bit pattern of size ByteSize.
2472 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2473 return SDValue();
2474
2475 // Properly sign extend the value.
2476 int MaskVal = SignExtend32(Value, ByteSize * 8);
2477
2478 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2479 if (MaskVal == 0) return SDValue();
2480
2481 // Finally, if this value fits in a 5 bit sext field, return it
2482 if (SignExtend32<5>(MaskVal) == MaskVal)
2483 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2484 return SDValue();
2485}
2486
2487//===----------------------------------------------------------------------===//
2488// Addressing Mode Selection
2489//===----------------------------------------------------------------------===//
2490
2491/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2492/// or 64-bit immediate, and if the value can be accurately represented as a
2493/// sign extension from a 16-bit value. If so, this returns true and the
2494/// immediate.
2495bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2496 if (!isa<ConstantSDNode>(N))
2497 return false;
2498
2499 Imm = (int16_t)N->getAsZExtVal();
2500 if (N->getValueType(0) == MVT::i32)
2501 return Imm == (int32_t)N->getAsZExtVal();
2502 else
2503 return Imm == (int64_t)N->getAsZExtVal();
2504}
2506 return isIntS16Immediate(Op.getNode(), Imm);
2507}
2508
2509/// Used when computing address flags for selecting loads and stores.
2510/// If we have an OR, check if the LHS and RHS are provably disjoint.
2511/// An OR of two provably disjoint values is equivalent to an ADD.
2512/// Most PPC load/store instructions compute the effective address as a sum,
2513/// so doing this conversion is useful.
2514static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2515 if (N.getOpcode() != ISD::OR)
2516 return false;
2517 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2518 if (!LHSKnown.Zero.getBoolValue())
2519 return false;
2520 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2521 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2522}
2523
2524/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2525/// be represented as an indexed [r+r] operation.
2527 SDValue &Index,
2528 SelectionDAG &DAG) const {
2529 for (SDNode *U : N->users()) {
2530 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2531 if (Memop->getMemoryVT() == MVT::f64) {
2532 Base = N.getOperand(0);
2533 Index = N.getOperand(1);
2534 return true;
2535 }
2536 }
2537 }
2538 return false;
2539}
2540
2541/// isIntS34Immediate - This method tests if value of node given can be
2542/// accurately represented as a sign extension from a 34-bit value. If so,
2543/// this returns true and the immediate.
2544bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2545 if (!isa<ConstantSDNode>(N))
2546 return false;
2547
2548 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2549 return isInt<34>(Imm);
2550}
2552 return isIntS34Immediate(Op.getNode(), Imm);
2553}
2554
2555/// SelectAddressRegReg - Given the specified addressed, check to see if it
2556/// can be represented as an indexed [r+r] operation. Returns false if it
2557/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2558/// non-zero and N can be represented by a base register plus a signed 16-bit
2559/// displacement, make a more precise judgement by checking (displacement % \p
2560/// EncodingAlignment).
2562 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2563 MaybeAlign EncodingAlignment) const {
2564 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2565 // a [pc+imm].
2567 return false;
2568
2569 int16_t Imm = 0;
2570 if (N.getOpcode() == ISD::ADD) {
2571 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2572 // SPE load/store can only handle 8-bit offsets.
2573 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2574 return true;
2575 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2576 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2577 return false; // r+i
2578 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2579 return false; // r+i
2580
2581 Base = N.getOperand(0);
2582 Index = N.getOperand(1);
2583 return true;
2584 } else if (N.getOpcode() == ISD::OR) {
2585 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2586 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2587 return false; // r+i can fold it if we can.
2588
2589 // If this is an or of disjoint bitfields, we can codegen this as an add
2590 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2591 // disjoint.
2592 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2593
2594 if (LHSKnown.Zero.getBoolValue()) {
2595 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2596 // If all of the bits are known zero on the LHS or RHS, the add won't
2597 // carry.
2598 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2599 Base = N.getOperand(0);
2600 Index = N.getOperand(1);
2601 return true;
2602 }
2603 }
2604 }
2605
2606 return false;
2607}
2608
2609// If we happen to be doing an i64 load or store into a stack slot that has
2610// less than a 4-byte alignment, then the frame-index elimination may need to
2611// use an indexed load or store instruction (because the offset may not be a
2612// multiple of 4). The extra register needed to hold the offset comes from the
2613// register scavenger, and it is possible that the scavenger will need to use
2614// an emergency spill slot. As a result, we need to make sure that a spill slot
2615// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2616// stack slot.
2617static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2618 // FIXME: This does not handle the LWA case.
2619 if (VT != MVT::i64)
2620 return;
2621
2622 // NOTE: We'll exclude negative FIs here, which come from argument
2623 // lowering, because there are no known test cases triggering this problem
2624 // using packed structures (or similar). We can remove this exclusion if
2625 // we find such a test case. The reason why this is so test-case driven is
2626 // because this entire 'fixup' is only to prevent crashes (from the
2627 // register scavenger) on not-really-valid inputs. For example, if we have:
2628 // %a = alloca i1
2629 // %b = bitcast i1* %a to i64*
2630 // store i64* a, i64 b
2631 // then the store should really be marked as 'align 1', but is not. If it
2632 // were marked as 'align 1' then the indexed form would have been
2633 // instruction-selected initially, and the problem this 'fixup' is preventing
2634 // won't happen regardless.
2635 if (FrameIdx < 0)
2636 return;
2637
2639 MachineFrameInfo &MFI = MF.getFrameInfo();
2640
2641 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2642 return;
2643
2644 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2645 FuncInfo->setHasNonRISpills();
2646}
2647
2648/// Returns true if the address N can be represented by a base register plus
2649/// a signed 16-bit displacement [r+imm], and if it is not better
2650/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2651/// displacements that are multiples of that value.
2653 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2654 MaybeAlign EncodingAlignment) const {
2655 // FIXME dl should come from parent load or store, not from address
2656 SDLoc dl(N);
2657
2658 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2659 // a [pc+imm].
2661 return false;
2662
2663 // If this can be more profitably realized as r+r, fail.
2664 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2665 return false;
2666
2667 if (N.getOpcode() == ISD::ADD) {
2668 int16_t imm = 0;
2669 if (isIntS16Immediate(N.getOperand(1), imm) &&
2670 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2671 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2672 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2673 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2674 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2675 } else {
2676 Base = N.getOperand(0);
2677 }
2678 return true; // [r+i]
2679 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2680 // Match LOAD (ADD (X, Lo(G))).
2681 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2682 "Cannot handle constant offsets yet!");
2683 Disp = N.getOperand(1).getOperand(0); // The global address.
2688 Base = N.getOperand(0);
2689 return true; // [&g+r]
2690 }
2691 } else if (N.getOpcode() == ISD::OR) {
2692 int16_t imm = 0;
2693 if (isIntS16Immediate(N.getOperand(1), imm) &&
2694 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2695 // If this is an or of disjoint bitfields, we can codegen this as an add
2696 // (for better address arithmetic) if the LHS and RHS of the OR are
2697 // provably disjoint.
2698 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2699
2700 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2701 // If all of the bits are known zero on the LHS or RHS, the add won't
2702 // carry.
2703 if (FrameIndexSDNode *FI =
2704 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2705 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2706 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2707 } else {
2708 Base = N.getOperand(0);
2709 }
2710 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2711 return true;
2712 }
2713 }
2714 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2715 // Loading from a constant address.
2716
2717 // If this address fits entirely in a 16-bit sext immediate field, codegen
2718 // this as "d, 0"
2719 int16_t Imm;
2720 if (isIntS16Immediate(CN, Imm) &&
2721 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2722 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2723 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2724 CN->getValueType(0));
2725 return true;
2726 }
2727
2728 // Handle 32-bit sext immediates with LIS + addr mode.
2729 if ((CN->getValueType(0) == MVT::i32 ||
2730 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2731 (!EncodingAlignment ||
2732 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2733 int Addr = (int)CN->getZExtValue();
2734
2735 // Otherwise, break this down into an LIS + disp.
2736 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2737
2738 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2739 MVT::i32);
2740 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2741 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2742 return true;
2743 }
2744 }
2745
2746 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2748 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2749 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2750 } else
2751 Base = N;
2752 return true; // [r+0]
2753}
2754
2755/// Similar to the 16-bit case but for instructions that take a 34-bit
2756/// displacement field (prefixed loads/stores).
2758 SDValue &Base,
2759 SelectionDAG &DAG) const {
2760 // Only on 64-bit targets.
2761 if (N.getValueType() != MVT::i64)
2762 return false;
2763
2764 SDLoc dl(N);
2765 int64_t Imm = 0;
2766
2767 if (N.getOpcode() == ISD::ADD) {
2768 if (!isIntS34Immediate(N.getOperand(1), Imm))
2769 return false;
2770 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2771 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2772 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2773 else
2774 Base = N.getOperand(0);
2775 return true;
2776 }
2777
2778 if (N.getOpcode() == ISD::OR) {
2779 if (!isIntS34Immediate(N.getOperand(1), Imm))
2780 return false;
2781 // If this is an or of disjoint bitfields, we can codegen this as an add
2782 // (for better address arithmetic) if the LHS and RHS of the OR are
2783 // provably disjoint.
2784 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2785 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2786 return false;
2787 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2788 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2789 else
2790 Base = N.getOperand(0);
2791 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2792 return true;
2793 }
2794
2795 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2796 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2797 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2798 return true;
2799 }
2800
2801 return false;
2802}
2803
2804/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2805/// represented as an indexed [r+r] operation.
2807 SDValue &Index,
2808 SelectionDAG &DAG) const {
2809 // Check to see if we can easily represent this as an [r+r] address. This
2810 // will fail if it thinks that the address is more profitably represented as
2811 // reg+imm, e.g. where imm = 0.
2812 if (SelectAddressRegReg(N, Base, Index, DAG))
2813 return true;
2814
2815 // If the address is the result of an add, we will utilize the fact that the
2816 // address calculation includes an implicit add. However, we can reduce
2817 // register pressure if we do not materialize a constant just for use as the
2818 // index register. We only get rid of the add if it is not an add of a
2819 // value and a 16-bit signed constant and both have a single use.
2820 int16_t imm = 0;
2821 if (N.getOpcode() == ISD::ADD &&
2822 (!isIntS16Immediate(N.getOperand(1), imm) ||
2823 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2824 Base = N.getOperand(0);
2825 Index = N.getOperand(1);
2826 return true;
2827 }
2828
2829 // Otherwise, do it the hard way, using R0 as the base register.
2830 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2831 N.getValueType());
2832 Index = N;
2833 return true;
2834}
2835
2836template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2837 Ty *PCRelCand = dyn_cast<Ty>(N);
2838 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2839}
2840
2841/// Returns true if this address is a PC Relative address.
2842/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2843/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2845 // This is a materialize PC Relative node. Always select this as PC Relative.
2846 Base = N;
2847 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2848 return true;
2853 return true;
2854 return false;
2855}
2856
2857/// Returns true if we should use a direct load into vector instruction
2858/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2859static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2860
2861 // If there are any other uses other than scalar to vector, then we should
2862 // keep it as a scalar load -> direct move pattern to prevent multiple
2863 // loads.
2865 if (!LD)
2866 return false;
2867
2868 EVT MemVT = LD->getMemoryVT();
2869 if (!MemVT.isSimple())
2870 return false;
2871 switch(MemVT.getSimpleVT().SimpleTy) {
2872 case MVT::i64:
2873 break;
2874 case MVT::i32:
2875 if (!ST.hasP8Vector())
2876 return false;
2877 break;
2878 case MVT::i16:
2879 case MVT::i8:
2880 if (!ST.hasP9Vector())
2881 return false;
2882 break;
2883 default:
2884 return false;
2885 }
2886
2887 SDValue LoadedVal(N, 0);
2888 if (!LoadedVal.hasOneUse())
2889 return false;
2890
2891 for (SDUse &Use : LD->uses())
2892 if (Use.getResNo() == 0 &&
2893 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2894 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2895 return false;
2896
2897 return true;
2898}
2899
2900/// getPreIndexedAddressParts - returns true by value, base pointer and
2901/// offset pointer and addressing mode by reference if the node's address
2902/// can be legally represented as pre-indexed load / store address.
2904 SDValue &Offset,
2906 SelectionDAG &DAG) const {
2907 if (DisablePPCPreinc) return false;
2908
2909 bool isLoad = true;
2910 SDValue Ptr;
2911 EVT VT;
2912 Align Alignment;
2913 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2914 Ptr = LD->getBasePtr();
2915 VT = LD->getMemoryVT();
2916 Alignment = LD->getAlign();
2917 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2918 Ptr = ST->getBasePtr();
2919 VT = ST->getMemoryVT();
2920 Alignment = ST->getAlign();
2921 isLoad = false;
2922 } else
2923 return false;
2924
2925 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2926 // instructions because we can fold these into a more efficient instruction
2927 // instead, (such as LXSD).
2928 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2929 return false;
2930 }
2931
2932 // PowerPC doesn't have preinc load/store instructions for vectors
2933 if (VT.isVector())
2934 return false;
2935
2936 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2937 // Common code will reject creating a pre-inc form if the base pointer
2938 // is a frame index, or if N is a store and the base pointer is either
2939 // the same as or a predecessor of the value being stored. Check for
2940 // those situations here, and try with swapped Base/Offset instead.
2941 bool Swap = false;
2942
2944 Swap = true;
2945 else if (!isLoad) {
2946 SDValue Val = cast<StoreSDNode>(N)->getValue();
2947 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2948 Swap = true;
2949 }
2950
2951 if (Swap)
2953
2954 AM = ISD::PRE_INC;
2955 return true;
2956 }
2957
2958 // LDU/STU can only handle immediates that are a multiple of 4.
2959 if (VT != MVT::i64) {
2960 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2961 return false;
2962 } else {
2963 // LDU/STU need an address with at least 4-byte alignment.
2964 if (Alignment < Align(4))
2965 return false;
2966
2967 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2968 return false;
2969 }
2970
2971 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2972 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2973 // sext i32 to i64 when addr mode is r+i.
2974 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2975 LD->getExtensionType() == ISD::SEXTLOAD &&
2977 return false;
2978 }
2979
2980 AM = ISD::PRE_INC;
2981 return true;
2982}
2983
2984//===----------------------------------------------------------------------===//
2985// LowerOperation implementation
2986//===----------------------------------------------------------------------===//
2987
2988/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2989/// and LoOpFlags to the target MO flags.
2990static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2991 unsigned &HiOpFlags, unsigned &LoOpFlags,
2992 const GlobalValue *GV = nullptr) {
2993 HiOpFlags = PPCII::MO_HA;
2994 LoOpFlags = PPCII::MO_LO;
2995
2996 // Don't use the pic base if not in PIC relocation model.
2997 if (IsPIC) {
2998 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
2999 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3000 }
3001}
3002
3003static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3004 SelectionDAG &DAG) {
3005 SDLoc DL(HiPart);
3006 EVT PtrVT = HiPart.getValueType();
3007 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3008
3009 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3010 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3011
3012 // With PIC, the first instruction is actually "GR+hi(&G)".
3013 if (isPIC)
3014 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3015 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3016
3017 // Generate non-pic code that has direct accesses to the constant pool.
3018 // The address of the global is just (hi(&g)+lo(&g)).
3019 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3020}
3021
3023 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3024 FuncInfo->setUsesTOCBasePtr();
3025}
3026
3030
3031SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3032 SDValue GA) const {
3033 EVT VT = Subtarget.getScalarIntVT();
3034 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3035 : Subtarget.isAIXABI()
3036 ? DAG.getRegister(PPC::R2, VT)
3037 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3038 SDValue Ops[] = { GA, Reg };
3039 return DAG.getMemIntrinsicNode(
3040 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3043}
3044
3045SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3046 SelectionDAG &DAG) const {
3047 EVT PtrVT = Op.getValueType();
3048 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3049 const Constant *C = CP->getConstVal();
3050
3051 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3052 // The actual address of the GlobalValue is stored in the TOC.
3053 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3054 if (Subtarget.isUsingPCRelativeCalls()) {
3055 SDLoc DL(CP);
3056 EVT Ty = getPointerTy(DAG.getDataLayout());
3057 SDValue ConstPool = DAG.getTargetConstantPool(
3058 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3059 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3060 }
3061 setUsesTOCBasePtr(DAG);
3062 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3063 return getTOCEntry(DAG, SDLoc(CP), GA);
3064 }
3065
3066 unsigned MOHiFlag, MOLoFlag;
3067 bool IsPIC = isPositionIndependent();
3068 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3069
3070 if (IsPIC && Subtarget.isSVR4ABI()) {
3071 SDValue GA =
3073 return getTOCEntry(DAG, SDLoc(CP), GA);
3074 }
3075
3076 SDValue CPIHi =
3077 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3078 SDValue CPILo =
3079 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3080 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3081}
3082
3083// For 64-bit PowerPC, prefer the more compact relative encodings.
3084// This trades 32 bits per jump table entry for one or two instructions
3085// on the jump site.
3092
3095 return false;
3096 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3097 return true;
3099}
3100
3102 SelectionDAG &DAG) const {
3103 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3105
3106 switch (getTargetMachine().getCodeModel()) {
3107 case CodeModel::Small:
3108 case CodeModel::Medium:
3110 default:
3111 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3113 }
3114}
3115
3116const MCExpr *
3118 unsigned JTI,
3119 MCContext &Ctx) const {
3120 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3122
3123 switch (getTargetMachine().getCodeModel()) {
3124 case CodeModel::Small:
3125 case CodeModel::Medium:
3127 default:
3128 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3129 }
3130}
3131
3132SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3133 EVT PtrVT = Op.getValueType();
3135
3136 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3137 if (Subtarget.isUsingPCRelativeCalls()) {
3138 SDLoc DL(JT);
3139 EVT Ty = getPointerTy(DAG.getDataLayout());
3140 SDValue GA =
3142 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3143 return MatAddr;
3144 }
3145
3146 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3147 // The actual address of the GlobalValue is stored in the TOC.
3148 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3149 setUsesTOCBasePtr(DAG);
3150 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3151 return getTOCEntry(DAG, SDLoc(JT), GA);
3152 }
3153
3154 unsigned MOHiFlag, MOLoFlag;
3155 bool IsPIC = isPositionIndependent();
3156 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3157
3158 if (IsPIC && Subtarget.isSVR4ABI()) {
3159 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3161 return getTOCEntry(DAG, SDLoc(GA), GA);
3162 }
3163
3164 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3165 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3166 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3167}
3168
3169SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3170 SelectionDAG &DAG) const {
3171 EVT PtrVT = Op.getValueType();
3172 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3173 const BlockAddress *BA = BASDN->getBlockAddress();
3174
3175 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3176 if (Subtarget.isUsingPCRelativeCalls()) {
3177 SDLoc DL(BASDN);
3178 EVT Ty = getPointerTy(DAG.getDataLayout());
3179 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3181 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3182 return MatAddr;
3183 }
3184
3185 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3186 // The actual BlockAddress is stored in the TOC.
3187 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3188 setUsesTOCBasePtr(DAG);
3189 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3190 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3191 }
3192
3193 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3194 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3195 return getTOCEntry(
3196 DAG, SDLoc(BASDN),
3197 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3198
3199 unsigned MOHiFlag, MOLoFlag;
3200 bool IsPIC = isPositionIndependent();
3201 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3202 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3203 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3204 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3205}
3206
3207SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3208 SelectionDAG &DAG) const {
3209 if (Subtarget.isAIXABI())
3210 return LowerGlobalTLSAddressAIX(Op, DAG);
3211
3212 return LowerGlobalTLSAddressLinux(Op, DAG);
3213}
3214
3215/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3216/// and then apply the update.
3218 SelectionDAG &DAG,
3219 const TargetMachine &TM) {
3220 // Initialize TLS model opt setting lazily:
3221 // (1) Use initial-exec for single TLS var references within current function.
3222 // (2) Use local-dynamic for multiple TLS var references within current
3223 // function.
3224 PPCFunctionInfo *FuncInfo =
3226 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3228 // Iterate over all instructions within current function, collect all TLS
3229 // global variables (global variables taken as the first parameter to
3230 // Intrinsic::threadlocal_address).
3231 const Function &Func = DAG.getMachineFunction().getFunction();
3232 for (const BasicBlock &BB : Func)
3233 for (const Instruction &I : BB)
3234 if (I.getOpcode() == Instruction::Call)
3235 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3236 if (Function *CF = CI->getCalledFunction())
3237 if (CF->isDeclaration() &&
3238 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3239 if (const GlobalValue *GV =
3240 dyn_cast<GlobalValue>(I.getOperand(0))) {
3241 TLSModel::Model GVModel = TM.getTLSModel(GV);
3242 if (GVModel == TLSModel::LocalDynamic)
3243 TLSGV.insert(GV);
3244 }
3245
3246 unsigned TLSGVCnt = TLSGV.size();
3247 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3248 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3249 FuncInfo->setAIXFuncUseTLSIEForLD();
3251 }
3252
3253 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3254 LLVM_DEBUG(
3255 dbgs() << DAG.getMachineFunction().getName()
3256 << " function is using the TLS-IE model for TLS-LD access.\n");
3257 Model = TLSModel::InitialExec;
3258 }
3259}
3260
3261SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3262 SelectionDAG &DAG) const {
3263 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3264
3265 if (DAG.getTarget().useEmulatedTLS())
3266 report_fatal_error("Emulated TLS is not yet supported on AIX");
3267
3268 SDLoc dl(GA);
3269 const GlobalValue *GV = GA->getGlobal();
3270 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3271 bool Is64Bit = Subtarget.isPPC64();
3273
3274 // Apply update to the TLS model.
3275 if (Subtarget.hasAIXShLibTLSModelOpt())
3277
3278 // TLS variables are accessed through TOC entries.
3279 // To support this, set the DAG to use the TOC base pointer.
3280 setUsesTOCBasePtr(DAG);
3281
3282 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3283
3284 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3285 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3286 bool HasAIXSmallTLSGlobalAttr = false;
3287 SDValue VariableOffsetTGA =
3288 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3289 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3290 SDValue TLSReg;
3291
3292 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3293 if (GVar->hasAttribute("aix-small-tls"))
3294 HasAIXSmallTLSGlobalAttr = true;
3295
3296 if (Is64Bit) {
3297 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3298 // involves a load of the variable offset (from the TOC), followed by an
3299 // add of the loaded variable offset to R13 (the thread pointer).
3300 // This code sequence looks like:
3301 // ld reg1,var[TC](2)
3302 // add reg2, reg1, r13 // r13 contains the thread pointer
3303 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3304
3305 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3306 // global variable attribute, produce a faster access sequence for
3307 // local-exec TLS variables where the offset from the TLS base is encoded
3308 // as an immediate operand.
3309 //
3310 // We only utilize the faster local-exec access sequence when the TLS
3311 // variable has a size within the policy limit. We treat types that are
3312 // not sized or are empty as being over the policy size limit.
3313 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3314 IsTLSLocalExecModel) {
3315 Type *GVType = GV->getValueType();
3316 if (GVType->isSized() && !GVType->isEmptyTy() &&
3317 GV->getDataLayout().getTypeAllocSize(GVType) <=
3319 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3320 }
3321 } else {
3322 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3323 // involves loading the variable offset from the TOC, generating a call to
3324 // .__get_tpointer to get the thread pointer (which will be in R3), and
3325 // adding the two together:
3326 // lwz reg1,var[TC](2)
3327 // bla .__get_tpointer
3328 // add reg2, reg1, r3
3329 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3330
3331 // We do not implement the 32-bit version of the faster access sequence
3332 // for local-exec that is controlled by the -maix-small-local-exec-tls
3333 // option, or the "aix-small-tls" global variable attribute.
3334 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3335 report_fatal_error("The small-local-exec TLS access sequence is "
3336 "currently only supported on AIX (64-bit mode).");
3337 }
3338 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3339 }
3340
3341 if (Model == TLSModel::LocalDynamic) {
3342 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3343
3344 // We do not implement the 32-bit version of the faster access sequence
3345 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3346 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3347 report_fatal_error("The small-local-dynamic TLS access sequence is "
3348 "currently only supported on AIX (64-bit mode).");
3349
3350 // For local-dynamic on AIX, we need to generate one TOC entry for each
3351 // variable offset, and a single module-handle TOC entry for the entire
3352 // file.
3353
3354 SDValue VariableOffsetTGA =
3355 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3356 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3357
3359 GlobalVariable *TLSGV =
3360 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3361 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3363 assert(TLSGV && "Not able to create GV for _$TLSML.");
3364 SDValue ModuleHandleTGA =
3365 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3366 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3367 SDValue ModuleHandle =
3368 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3369
3370 // With the -maix-small-local-dynamic-tls option, produce a faster access
3371 // sequence for local-dynamic TLS variables where the offset from the
3372 // module-handle is encoded as an immediate operand.
3373 //
3374 // We only utilize the faster local-dynamic access sequence when the TLS
3375 // variable has a size within the policy limit. We treat types that are
3376 // not sized or are empty as being over the policy size limit.
3377 if (HasAIXSmallLocalDynamicTLS) {
3378 Type *GVType = GV->getValueType();
3379 if (GVType->isSized() && !GVType->isEmptyTy() &&
3380 GV->getDataLayout().getTypeAllocSize(GVType) <=
3382 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3383 ModuleHandle);
3384 }
3385
3386 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3387 }
3388
3389 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3390 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3391 // need to generate two TOC entries, one for the variable offset, one for the
3392 // region handle. The global address for the TOC entry of the region handle is
3393 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3394 // entry of the variable offset is created with MO_TLSGD_FLAG.
3395 SDValue VariableOffsetTGA =
3396 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3397 SDValue RegionHandleTGA =
3398 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3399 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3400 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3401 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3402 RegionHandle);
3403}
3404
3405SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3406 SelectionDAG &DAG) const {
3407 // FIXME: TLS addresses currently use medium model code sequences,
3408 // which is the most useful form. Eventually support for small and
3409 // large models could be added if users need it, at the cost of
3410 // additional complexity.
3411 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3412 if (DAG.getTarget().useEmulatedTLS())
3413 return LowerToTLSEmulatedModel(GA, DAG);
3414
3415 SDLoc dl(GA);
3416 const GlobalValue *GV = GA->getGlobal();
3417 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3418 bool is64bit = Subtarget.isPPC64();
3419 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3420 PICLevel::Level picLevel = M->getPICLevel();
3421
3422 const TargetMachine &TM = getTargetMachine();
3423 TLSModel::Model Model = TM.getTLSModel(GV);
3424
3425 if (Model == TLSModel::LocalExec) {
3426 if (Subtarget.isUsingPCRelativeCalls()) {
3427 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3428 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3430 SDValue MatAddr =
3431 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3432 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3433 }
3434
3435 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3437 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3439 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3440 : DAG.getRegister(PPC::R2, MVT::i32);
3441
3442 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3443 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3444 }
3445
3446 if (Model == TLSModel::InitialExec) {
3447 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3449 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3450 SDValue TGATLS = DAG.getTargetGlobalAddress(
3451 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3452 SDValue TPOffset;
3453 if (IsPCRel) {
3454 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3455 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3456 MachinePointerInfo());
3457 } else {
3458 SDValue GOTPtr;
3459 if (is64bit) {
3460 setUsesTOCBasePtr(DAG);
3461 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3462 GOTPtr =
3463 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3464 } else {
3465 if (!TM.isPositionIndependent())
3466 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3467 else if (picLevel == PICLevel::SmallPIC)
3468 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3469 else
3470 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3471 }
3472 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3473 }
3474 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3475 }
3476
3477 if (Model == TLSModel::GeneralDynamic) {
3478 if (Subtarget.isUsingPCRelativeCalls()) {
3479 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3481 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3482 }
3483
3484 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3485 SDValue GOTPtr;
3486 if (is64bit) {
3487 setUsesTOCBasePtr(DAG);
3488 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3489 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3490 GOTReg, TGA);
3491 } else {
3492 if (picLevel == PICLevel::SmallPIC)
3493 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3494 else
3495 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3496 }
3497 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3498 GOTPtr, TGA, TGA);
3499 }
3500
3501 if (Model == TLSModel::LocalDynamic) {
3502 if (Subtarget.isUsingPCRelativeCalls()) {
3503 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3505 SDValue MatPCRel =
3506 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3507 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3508 }
3509
3510 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3511 SDValue GOTPtr;
3512 if (is64bit) {
3513 setUsesTOCBasePtr(DAG);
3514 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3515 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3516 GOTReg, TGA);
3517 } else {
3518 if (picLevel == PICLevel::SmallPIC)
3519 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3520 else
3521 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3522 }
3523 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3524 PtrVT, GOTPtr, TGA, TGA);
3525 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3526 PtrVT, TLSAddr, TGA);
3527 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3528 }
3529
3530 llvm_unreachable("Unknown TLS model!");
3531}
3532
3533SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3534 SelectionDAG &DAG) const {
3535 EVT PtrVT = Op.getValueType();
3536 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3537 SDLoc DL(GSDN);
3538 const GlobalValue *GV = GSDN->getGlobal();
3539
3540 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3541 // The actual address of the GlobalValue is stored in the TOC.
3542 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3543 if (Subtarget.isUsingPCRelativeCalls()) {
3544 EVT Ty = getPointerTy(DAG.getDataLayout());
3546 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3548 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3549 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3550 MachinePointerInfo());
3551 return Load;
3552 } else {
3553 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3555 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3556 }
3557 }
3558 setUsesTOCBasePtr(DAG);
3559 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3560 return getTOCEntry(DAG, DL, GA);
3561 }
3562
3563 unsigned MOHiFlag, MOLoFlag;
3564 bool IsPIC = isPositionIndependent();
3565 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3566
3567 if (IsPIC && Subtarget.isSVR4ABI()) {
3568 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3569 GSDN->getOffset(),
3571 return getTOCEntry(DAG, DL, GA);
3572 }
3573
3574 SDValue GAHi =
3575 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3576 SDValue GALo =
3577 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3578
3579 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3580}
3581
3582SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3583 bool IsStrict = Op->isStrictFPOpcode();
3584 ISD::CondCode CC =
3585 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3586 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3587 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3588 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3589 EVT LHSVT = LHS.getValueType();
3590 SDLoc dl(Op);
3591
3592 // Soften the setcc with libcall if it is fp128.
3593 if (LHSVT == MVT::f128) {
3594 assert(!Subtarget.hasP9Vector() &&
3595 "SETCC for f128 is already legal under Power9!");
3596 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3597 Op->getOpcode() == ISD::STRICT_FSETCCS);
3598 if (RHS.getNode())
3599 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3600 DAG.getCondCode(CC));
3601 if (IsStrict)
3602 return DAG.getMergeValues({LHS, Chain}, dl);
3603 return LHS;
3604 }
3605
3606 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3607
3608 if (Op.getValueType() == MVT::v2i64) {
3609 // When the operands themselves are v2i64 values, we need to do something
3610 // special because VSX has no underlying comparison operations for these.
3611 if (LHS.getValueType() == MVT::v2i64) {
3612 // Equality can be handled by casting to the legal type for Altivec
3613 // comparisons, everything else needs to be expanded.
3614 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3615 return SDValue();
3616 SDValue SetCC32 = DAG.getSetCC(
3617 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3618 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3619 int ShuffV[] = {1, 0, 3, 2};
3620 SDValue Shuff =
3621 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3622 return DAG.getBitcast(MVT::v2i64,
3623 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3624 dl, MVT::v4i32, Shuff, SetCC32));
3625 }
3626
3627 // We handle most of these in the usual way.
3628 return Op;
3629 }
3630
3631 // If we're comparing for equality to zero, expose the fact that this is
3632 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3633 // fold the new nodes.
3634 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3635 return V;
3636
3637 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3638 // Leave comparisons against 0 and -1 alone for now, since they're usually
3639 // optimized. FIXME: revisit this when we can custom lower all setcc
3640 // optimizations.
3641 if (C->isAllOnes() || C->isZero())
3642 return SDValue();
3643 }
3644
3645 // If we have an integer seteq/setne, turn it into a compare against zero
3646 // by xor'ing the rhs with the lhs, which is faster than setting a
3647 // condition register, reading it back out, and masking the correct bit. The
3648 // normal approach here uses sub to do this instead of xor. Using xor exposes
3649 // the result to other bit-twiddling opportunities.
3650 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3651 EVT VT = Op.getValueType();
3652 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3653 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3654 }
3655 return SDValue();
3656}
3657
3658SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3659 SDNode *Node = Op.getNode();
3660 EVT VT = Node->getValueType(0);
3661 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3662 SDValue InChain = Node->getOperand(0);
3663 SDValue VAListPtr = Node->getOperand(1);
3664 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3665 SDLoc dl(Node);
3666
3667 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3668
3669 // gpr_index
3670 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3671 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3672 InChain = GprIndex.getValue(1);
3673
3674 if (VT == MVT::i64) {
3675 // Check if GprIndex is even
3676 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3677 DAG.getConstant(1, dl, MVT::i32));
3678 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3679 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3680 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3681 DAG.getConstant(1, dl, MVT::i32));
3682 // Align GprIndex to be even if it isn't
3683 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3684 GprIndex);
3685 }
3686
3687 // fpr index is 1 byte after gpr
3688 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3689 DAG.getConstant(1, dl, MVT::i32));
3690
3691 // fpr
3692 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3693 FprPtr, MachinePointerInfo(SV), MVT::i8);
3694 InChain = FprIndex.getValue(1);
3695
3696 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3697 DAG.getConstant(8, dl, MVT::i32));
3698
3699 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3700 DAG.getConstant(4, dl, MVT::i32));
3701
3702 // areas
3703 SDValue OverflowArea =
3704 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3705 InChain = OverflowArea.getValue(1);
3706
3707 SDValue RegSaveArea =
3708 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3709 InChain = RegSaveArea.getValue(1);
3710
3711 // select overflow_area if index > 8
3712 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3713 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3714
3715 // adjustment constant gpr_index * 4/8
3716 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3717 VT.isInteger() ? GprIndex : FprIndex,
3718 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3719 MVT::i32));
3720
3721 // OurReg = RegSaveArea + RegConstant
3722 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3723 RegConstant);
3724
3725 // Floating types are 32 bytes into RegSaveArea
3726 if (VT.isFloatingPoint())
3727 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3728 DAG.getConstant(32, dl, MVT::i32));
3729
3730 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3731 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3732 VT.isInteger() ? GprIndex : FprIndex,
3733 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3734 MVT::i32));
3735
3736 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3737 VT.isInteger() ? VAListPtr : FprPtr,
3738 MachinePointerInfo(SV), MVT::i8);
3739
3740 // determine if we should load from reg_save_area or overflow_area
3741 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3742
3743 // increase overflow_area by 4/8 if gpr/fpr > 8
3744 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3745 DAG.getConstant(VT.isInteger() ? 4 : 8,
3746 dl, MVT::i32));
3747
3748 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3749 OverflowAreaPlusN);
3750
3751 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3752 MachinePointerInfo(), MVT::i32);
3753
3754 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3755}
3756
3757SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3758 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3759
3760 // We have to copy the entire va_list struct:
3761 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3762 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3763 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3764 false, true, /*CI=*/nullptr, std::nullopt,
3765 MachinePointerInfo(), MachinePointerInfo());
3766}
3767
3768SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3769 SelectionDAG &DAG) const {
3770 return Op.getOperand(0);
3771}
3772
3773SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3774 MachineFunction &MF = DAG.getMachineFunction();
3775 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3776
3777 assert((Op.getOpcode() == ISD::INLINEASM ||
3778 Op.getOpcode() == ISD::INLINEASM_BR) &&
3779 "Expecting Inline ASM node.");
3780
3781 // If an LR store is already known to be required then there is not point in
3782 // checking this ASM as well.
3783 if (MFI.isLRStoreRequired())
3784 return Op;
3785
3786 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3787 // type MVT::Glue. We want to ignore this last operand if that is the case.
3788 unsigned NumOps = Op.getNumOperands();
3789 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3790 --NumOps;
3791
3792 // Check all operands that may contain the LR.
3793 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3794 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3795 unsigned NumVals = Flags.getNumOperandRegisters();
3796 ++i; // Skip the ID value.
3797
3798 switch (Flags.getKind()) {
3799 default:
3800 llvm_unreachable("Bad flags!");
3804 i += NumVals;
3805 break;
3809 for (; NumVals; --NumVals, ++i) {
3810 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3811 if (Reg != PPC::LR && Reg != PPC::LR8)
3812 continue;
3813 MFI.setLRStoreRequired();
3814 return Op;
3815 }
3816 break;
3817 }
3818 }
3819 }
3820
3821 return Op;
3822}
3823
3824SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3825 SelectionDAG &DAG) const {
3826 SDValue Chain = Op.getOperand(0);
3827 SDValue Trmp = Op.getOperand(1); // trampoline
3828 SDValue FPtr = Op.getOperand(2); // nested function
3829 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3830 SDLoc dl(Op);
3831
3832 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3833
3834 if (Subtarget.isAIXABI()) {
3835 // On AIX we create a trampoline descriptor by combining the
3836 // entry point and TOC from the global descriptor (FPtr) with the
3837 // nest argument as the environment pointer.
3838 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3839 MaybeAlign PointerAlign(PointerSize);
3840 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3843 : MachineMemOperand::MONone;
3844
3845 uint64_t TOCPointerOffset = 1 * PointerSize;
3846 uint64_t EnvPointerOffset = 2 * PointerSize;
3847 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3848 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3849
3850 const Value *TrampolineAddr =
3851 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3852 const Function *Func =
3853 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3854
3855 SDValue OutChains[3];
3856
3857 // Copy the entry point address from the global descriptor to the
3858 // trampoline buffer.
3859 SDValue LoadEntryPoint =
3860 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3861 PointerAlign, MMOFlags);
3862 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3863 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3864 MachinePointerInfo(TrampolineAddr, 0));
3865
3866 // Copy the TOC pointer from the global descriptor to the trampoline
3867 // buffer.
3868 SDValue TOCFromDescriptorPtr =
3869 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3870 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3871 MachinePointerInfo(Func, TOCPointerOffset),
3872 PointerAlign, MMOFlags);
3873 SDValue TrampolineTOCPointer =
3874 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3875 SDValue TOCLoadChain = TOCReg.getValue(1);
3876 OutChains[1] =
3877 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3878 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3879
3880 // Store the nest argument into the environment pointer in the trampoline
3881 // buffer.
3882 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3883 OutChains[2] =
3884 DAG.getStore(Chain, dl, Nest, EnvPointer,
3885 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3886
3888 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3889 return TokenFactor;
3890 }
3891
3892 bool isPPC64 = (PtrVT == MVT::i64);
3893 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3894
3896 Args.emplace_back(Trmp, IntPtrTy);
3897 // TrampSize == (isPPC64 ? 48 : 40);
3898 Args.emplace_back(
3899 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3900 IntPtrTy);
3901 Args.emplace_back(FPtr, IntPtrTy);
3902 Args.emplace_back(Nest, IntPtrTy);
3903
3904 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3905 TargetLowering::CallLoweringInfo CLI(DAG);
3906 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3908 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3909
3910 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3911 return CallResult.second;
3912}
3913
3914SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3915 MachineFunction &MF = DAG.getMachineFunction();
3916 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3917 EVT PtrVT = getPointerTy(MF.getDataLayout());
3918
3919 SDLoc dl(Op);
3920
3921 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3922 // vastart just stores the address of the VarArgsFrameIndex slot into the
3923 // memory location argument.
3924 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3925 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3926 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3927 MachinePointerInfo(SV));
3928 }
3929
3930 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3931 // We suppose the given va_list is already allocated.
3932 //
3933 // typedef struct {
3934 // char gpr; /* index into the array of 8 GPRs
3935 // * stored in the register save area
3936 // * gpr=0 corresponds to r3,
3937 // * gpr=1 to r4, etc.
3938 // */
3939 // char fpr; /* index into the array of 8 FPRs
3940 // * stored in the register save area
3941 // * fpr=0 corresponds to f1,
3942 // * fpr=1 to f2, etc.
3943 // */
3944 // char *overflow_arg_area;
3945 // /* location on stack that holds
3946 // * the next overflow argument
3947 // */
3948 // char *reg_save_area;
3949 // /* where r3:r10 and f1:f8 (if saved)
3950 // * are stored
3951 // */
3952 // } va_list[1];
3953
3954 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3955 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3956 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3957 PtrVT);
3958 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3959 PtrVT);
3960
3961 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3962 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3963
3964 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3965 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3966
3967 uint64_t FPROffset = 1;
3968 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3969
3970 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3971
3972 // Store first byte : number of int regs
3973 SDValue firstStore =
3974 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3975 MachinePointerInfo(SV), MVT::i8);
3976 uint64_t nextOffset = FPROffset;
3977 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3978 ConstFPROffset);
3979
3980 // Store second byte : number of float regs
3981 SDValue secondStore =
3982 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3983 MachinePointerInfo(SV, nextOffset), MVT::i8);
3984 nextOffset += StackOffset;
3985 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3986
3987 // Store second word : arguments given on stack
3988 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3989 MachinePointerInfo(SV, nextOffset));
3990 nextOffset += FrameOffset;
3991 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3992
3993 // Store third word : arguments given in registers
3994 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3995 MachinePointerInfo(SV, nextOffset));
3996}
3997
3998/// FPR - The set of FP registers that should be allocated for arguments
3999/// on Darwin and AIX.
4000static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4001 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4002 PPC::F11, PPC::F12, PPC::F13};
4003
4004/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4005/// the stack.
4006static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4007 unsigned PtrByteSize) {
4008 unsigned ArgSize = ArgVT.getStoreSize();
4009 if (Flags.isByVal())
4010 ArgSize = Flags.getByValSize();
4011
4012 // Round up to multiples of the pointer size, except for array members,
4013 // which are always packed.
4014 if (!Flags.isInConsecutiveRegs())
4015 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4016
4017 return ArgSize;
4018}
4019
4020/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4021/// on the stack.
4023 ISD::ArgFlagsTy Flags,
4024 unsigned PtrByteSize) {
4025 Align Alignment(PtrByteSize);
4026
4027 // Altivec parameters are padded to a 16 byte boundary.
4028 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4029 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4030 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4031 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4032 Alignment = Align(16);
4033
4034 // ByVal parameters are aligned as requested.
4035 if (Flags.isByVal()) {
4036 auto BVAlign = Flags.getNonZeroByValAlign();
4037 if (BVAlign > PtrByteSize) {
4038 if (BVAlign.value() % PtrByteSize != 0)
4040 "ByVal alignment is not a multiple of the pointer size");
4041
4042 Alignment = BVAlign;
4043 }
4044 }
4045
4046 // Array members are always packed to their original alignment.
4047 if (Flags.isInConsecutiveRegs()) {
4048 // If the array member was split into multiple registers, the first
4049 // needs to be aligned to the size of the full type. (Except for
4050 // ppcf128, which is only aligned as its f64 components.)
4051 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4052 Alignment = Align(OrigVT.getStoreSize());
4053 else
4054 Alignment = Align(ArgVT.getStoreSize());
4055 }
4056
4057 return Alignment;
4058}
4059
4060/// CalculateStackSlotUsed - Return whether this argument will use its
4061/// stack slot (instead of being passed in registers). ArgOffset,
4062/// AvailableFPRs, and AvailableVRs must hold the current argument
4063/// position, and will be updated to account for this argument.
4064static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4065 unsigned PtrByteSize, unsigned LinkageSize,
4066 unsigned ParamAreaSize, unsigned &ArgOffset,
4067 unsigned &AvailableFPRs,
4068 unsigned &AvailableVRs) {
4069 bool UseMemory = false;
4070
4071 // Respect alignment of argument on the stack.
4072 Align Alignment =
4073 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4074 ArgOffset = alignTo(ArgOffset, Alignment);
4075 // If there's no space left in the argument save area, we must
4076 // use memory (this check also catches zero-sized arguments).
4077 if (ArgOffset >= LinkageSize + ParamAreaSize)
4078 UseMemory = true;
4079
4080 // Allocate argument on the stack.
4081 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4082 if (Flags.isInConsecutiveRegsLast())
4083 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4084 // If we overran the argument save area, we must use memory
4085 // (this check catches arguments passed partially in memory)
4086 if (ArgOffset > LinkageSize + ParamAreaSize)
4087 UseMemory = true;
4088
4089 // However, if the argument is actually passed in an FPR or a VR,
4090 // we don't use memory after all.
4091 if (!Flags.isByVal()) {
4092 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4093 if (AvailableFPRs > 0) {
4094 --AvailableFPRs;
4095 return false;
4096 }
4097 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4098 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4099 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4100 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4101 if (AvailableVRs > 0) {
4102 --AvailableVRs;
4103 return false;
4104 }
4105 }
4106
4107 return UseMemory;
4108}
4109
4110/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4111/// ensure minimum alignment required for target.
4113 unsigned NumBytes) {
4114 return alignTo(NumBytes, Lowering->getStackAlign());
4115}
4116
4117SDValue PPCTargetLowering::LowerFormalArguments(
4118 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4119 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4120 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4121 if (Subtarget.isAIXABI())
4122 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4123 InVals);
4124 if (Subtarget.is64BitELFABI())
4125 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4126 InVals);
4127 assert(Subtarget.is32BitELFABI());
4128 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4129 InVals);
4130}
4131
4132SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4133 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4134 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4135 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4136
4137 // 32-bit SVR4 ABI Stack Frame Layout:
4138 // +-----------------------------------+
4139 // +--> | Back chain |
4140 // | +-----------------------------------+
4141 // | | Floating-point register save area |
4142 // | +-----------------------------------+
4143 // | | General register save area |
4144 // | +-----------------------------------+
4145 // | | CR save word |
4146 // | +-----------------------------------+
4147 // | | VRSAVE save word |
4148 // | +-----------------------------------+
4149 // | | Alignment padding |
4150 // | +-----------------------------------+
4151 // | | Vector register save area |
4152 // | +-----------------------------------+
4153 // | | Local variable space |
4154 // | +-----------------------------------+
4155 // | | Parameter list area |
4156 // | +-----------------------------------+
4157 // | | LR save word |
4158 // | +-----------------------------------+
4159 // SP--> +--- | Back chain |
4160 // +-----------------------------------+
4161 //
4162 // Specifications:
4163 // System V Application Binary Interface PowerPC Processor Supplement
4164 // AltiVec Technology Programming Interface Manual
4165
4166 MachineFunction &MF = DAG.getMachineFunction();
4167 MachineFrameInfo &MFI = MF.getFrameInfo();
4168 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4169
4170 EVT PtrVT = getPointerTy(MF.getDataLayout());
4171 // Potential tail calls could cause overwriting of argument stack slots.
4172 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4173 (CallConv == CallingConv::Fast));
4174 const Align PtrAlign(4);
4175
4176 // Assign locations to all of the incoming arguments.
4178 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4179 *DAG.getContext());
4180
4181 // Reserve space for the linkage area on the stack.
4182 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4183 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4184 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4185
4186 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4187 CCValAssign &VA = ArgLocs[i];
4188
4189 // Arguments stored in registers.
4190 if (VA.isRegLoc()) {
4191 const TargetRegisterClass *RC;
4192 EVT ValVT = VA.getValVT();
4193
4194 switch (ValVT.getSimpleVT().SimpleTy) {
4195 default:
4196 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4197 case MVT::i1:
4198 case MVT::i32:
4199 RC = &PPC::GPRCRegClass;
4200 break;
4201 case MVT::f32:
4202 if (Subtarget.hasP8Vector())
4203 RC = &PPC::VSSRCRegClass;
4204 else if (Subtarget.hasSPE())
4205 RC = &PPC::GPRCRegClass;
4206 else
4207 RC = &PPC::F4RCRegClass;
4208 break;
4209 case MVT::f64:
4210 if (Subtarget.hasVSX())
4211 RC = &PPC::VSFRCRegClass;
4212 else if (Subtarget.hasSPE())
4213 // SPE passes doubles in GPR pairs.
4214 RC = &PPC::GPRCRegClass;
4215 else
4216 RC = &PPC::F8RCRegClass;
4217 break;
4218 case MVT::v16i8:
4219 case MVT::v8i16:
4220 case MVT::v4i32:
4221 RC = &PPC::VRRCRegClass;
4222 break;
4223 case MVT::v4f32:
4224 RC = &PPC::VRRCRegClass;
4225 break;
4226 case MVT::v2f64:
4227 case MVT::v2i64:
4228 RC = &PPC::VRRCRegClass;
4229 break;
4230 }
4231
4232 SDValue ArgValue;
4233 // Transform the arguments stored in physical registers into
4234 // virtual ones.
4235 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4236 assert(i + 1 < e && "No second half of double precision argument");
4237 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4238 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4239 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4240 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4241 if (!Subtarget.isLittleEndian())
4242 std::swap (ArgValueLo, ArgValueHi);
4243 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4244 ArgValueHi);
4245 } else {
4246 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4247 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4248 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4249 if (ValVT == MVT::i1)
4250 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4251 }
4252
4253 InVals.push_back(ArgValue);
4254 } else {
4255 // Argument stored in memory.
4256 assert(VA.isMemLoc());
4257
4258 // Get the extended size of the argument type in stack
4259 unsigned ArgSize = VA.getLocVT().getStoreSize();
4260 // Get the actual size of the argument type
4261 unsigned ObjSize = VA.getValVT().getStoreSize();
4262 unsigned ArgOffset = VA.getLocMemOffset();
4263 // Stack objects in PPC32 are right justified.
4264 ArgOffset += ArgSize - ObjSize;
4265 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4266
4267 // Create load nodes to retrieve arguments from the stack.
4268 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4269 InVals.push_back(
4270 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4271 }
4272 }
4273
4274 // Assign locations to all of the incoming aggregate by value arguments.
4275 // Aggregates passed by value are stored in the local variable space of the
4276 // caller's stack frame, right above the parameter list area.
4277 SmallVector<CCValAssign, 16> ByValArgLocs;
4278 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4279 ByValArgLocs, *DAG.getContext());
4280
4281 // Reserve stack space for the allocations in CCInfo.
4282 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4283
4284 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4285
4286 // Area that is at least reserved in the caller of this function.
4287 unsigned MinReservedArea = CCByValInfo.getStackSize();
4288 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4289
4290 // Set the size that is at least reserved in caller of this function. Tail
4291 // call optimized function's reserved stack space needs to be aligned so that
4292 // taking the difference between two stack areas will result in an aligned
4293 // stack.
4294 MinReservedArea =
4295 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4296 FuncInfo->setMinReservedArea(MinReservedArea);
4297
4299
4300 // If the function takes variable number of arguments, make a frame index for
4301 // the start of the first vararg value... for expansion of llvm.va_start.
4302 if (isVarArg) {
4303 static const MCPhysReg GPArgRegs[] = {
4304 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4305 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4306 };
4307 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4308
4309 static const MCPhysReg FPArgRegs[] = {
4310 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4311 PPC::F8
4312 };
4313 unsigned NumFPArgRegs = std::size(FPArgRegs);
4314
4315 if (useSoftFloat() || hasSPE())
4316 NumFPArgRegs = 0;
4317
4318 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4319 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4320
4321 // Make room for NumGPArgRegs and NumFPArgRegs.
4322 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4323 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4324
4326 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4327
4328 FuncInfo->setVarArgsFrameIndex(
4329 MFI.CreateStackObject(Depth, Align(8), false));
4330 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4331
4332 // The fixed integer arguments of a variadic function are stored to the
4333 // VarArgsFrameIndex on the stack so that they may be loaded by
4334 // dereferencing the result of va_next.
4335 for (MCPhysReg GPArgReg : GPArgRegs) {
4336 // Get an existing live-in vreg, or add a new one.
4337 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4338 if (!VReg)
4339 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4340
4341 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4342 SDValue Store =
4343 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4344 MemOps.push_back(Store);
4345 // Increment the address by four for the next argument to store
4346 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4347 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4348 }
4349
4350 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4351 // is set.
4352 // The double arguments are stored to the VarArgsFrameIndex
4353 // on the stack.
4354 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4355 // Get an existing live-in vreg, or add a new one.
4356 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4357 if (!VReg)
4358 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4359
4360 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4361 SDValue Store =
4362 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4363 MemOps.push_back(Store);
4364 // Increment the address by eight for the next argument to store
4365 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4366 PtrVT);
4367 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4368 }
4369 }
4370
4371 if (!MemOps.empty())
4372 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4373
4374 return Chain;
4375}
4376
4377// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4378// value to MVT::i64 and then truncate to the correct register size.
4379SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4380 EVT ObjectVT, SelectionDAG &DAG,
4381 SDValue ArgVal,
4382 const SDLoc &dl) const {
4383 if (Flags.isSExt())
4384 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4385 DAG.getValueType(ObjectVT));
4386 else if (Flags.isZExt())
4387 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4388 DAG.getValueType(ObjectVT));
4389
4390 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4391}
4392
4393SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4394 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4395 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4396 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4397 // TODO: add description of PPC stack frame format, or at least some docs.
4398 //
4399 bool isELFv2ABI = Subtarget.isELFv2ABI();
4400 bool isLittleEndian = Subtarget.isLittleEndian();
4401 MachineFunction &MF = DAG.getMachineFunction();
4402 MachineFrameInfo &MFI = MF.getFrameInfo();
4403 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4404
4405 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4406 "fastcc not supported on varargs functions");
4407
4408 EVT PtrVT = getPointerTy(MF.getDataLayout());
4409 // Potential tail calls could cause overwriting of argument stack slots.
4410 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4411 (CallConv == CallingConv::Fast));
4412 unsigned PtrByteSize = 8;
4413 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4414
4415 static const MCPhysReg GPR[] = {
4416 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4417 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4418 };
4419 static const MCPhysReg VR[] = {
4420 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4421 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4422 };
4423
4424 const unsigned Num_GPR_Regs = std::size(GPR);
4425 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4426 const unsigned Num_VR_Regs = std::size(VR);
4427
4428 // Do a first pass over the arguments to determine whether the ABI
4429 // guarantees that our caller has allocated the parameter save area
4430 // on its stack frame. In the ELFv1 ABI, this is always the case;
4431 // in the ELFv2 ABI, it is true if this is a vararg function or if
4432 // any parameter is located in a stack slot.
4433
4434 bool HasParameterArea = !isELFv2ABI || isVarArg;
4435 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4436 unsigned NumBytes = LinkageSize;
4437 unsigned AvailableFPRs = Num_FPR_Regs;
4438 unsigned AvailableVRs = Num_VR_Regs;
4439 for (const ISD::InputArg &In : Ins) {
4440 if (In.Flags.isNest())
4441 continue;
4442
4443 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4444 LinkageSize, ParamAreaSize, NumBytes,
4445 AvailableFPRs, AvailableVRs))
4446 HasParameterArea = true;
4447 }
4448
4449 // Add DAG nodes to load the arguments or copy them out of registers. On
4450 // entry to a function on PPC, the arguments start after the linkage area,
4451 // although the first ones are often in registers.
4452
4453 unsigned ArgOffset = LinkageSize;
4454 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4457 unsigned CurArgIdx = 0;
4458 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4459 SDValue ArgVal;
4460 bool needsLoad = false;
4461 EVT ObjectVT = Ins[ArgNo].VT;
4462 EVT OrigVT = Ins[ArgNo].ArgVT;
4463 unsigned ObjSize = ObjectVT.getStoreSize();
4464 unsigned ArgSize = ObjSize;
4465 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4466 if (Ins[ArgNo].isOrigArg()) {
4467 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4468 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4469 }
4470 // We re-align the argument offset for each argument, except when using the
4471 // fast calling convention, when we need to make sure we do that only when
4472 // we'll actually use a stack slot.
4473 unsigned CurArgOffset;
4474 Align Alignment;
4475 auto ComputeArgOffset = [&]() {
4476 /* Respect alignment of argument on the stack. */
4477 Alignment =
4478 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4479 ArgOffset = alignTo(ArgOffset, Alignment);
4480 CurArgOffset = ArgOffset;
4481 };
4482
4483 if (CallConv != CallingConv::Fast) {
4484 ComputeArgOffset();
4485
4486 /* Compute GPR index associated with argument offset. */
4487 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4488 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4489 }
4490
4491 // FIXME the codegen can be much improved in some cases.
4492 // We do not have to keep everything in memory.
4493 if (Flags.isByVal()) {
4494 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4495
4496 if (CallConv == CallingConv::Fast)
4497 ComputeArgOffset();
4498
4499 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4500 ObjSize = Flags.getByValSize();
4501 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4502 // Empty aggregate parameters do not take up registers. Examples:
4503 // struct { } a;
4504 // union { } b;
4505 // int c[0];
4506 // etc. However, we have to provide a place-holder in InVals, so
4507 // pretend we have an 8-byte item at the current address for that
4508 // purpose.
4509 if (!ObjSize) {
4510 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4511 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4512 InVals.push_back(FIN);
4513 continue;
4514 }
4515
4516 // Create a stack object covering all stack doublewords occupied
4517 // by the argument. If the argument is (fully or partially) on
4518 // the stack, or if the argument is fully in registers but the
4519 // caller has allocated the parameter save anyway, we can refer
4520 // directly to the caller's stack frame. Otherwise, create a
4521 // local copy in our own frame.
4522 int FI;
4523 if (HasParameterArea ||
4524 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4525 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4526 else
4527 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4528 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4529
4530 // Handle aggregates smaller than 8 bytes.
4531 if (ObjSize < PtrByteSize) {
4532 // The value of the object is its address, which differs from the
4533 // address of the enclosing doubleword on big-endian systems.
4534 SDValue Arg = FIN;
4535 if (!isLittleEndian) {
4536 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4537 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4538 }
4539 InVals.push_back(Arg);
4540
4541 if (GPR_idx != Num_GPR_Regs) {
4542 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4543 FuncInfo->addLiveInAttr(VReg, Flags);
4544 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4545 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4546 SDValue Store =
4547 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4548 MachinePointerInfo(&*FuncArg), ObjType);
4549 MemOps.push_back(Store);
4550 }
4551 // Whether we copied from a register or not, advance the offset
4552 // into the parameter save area by a full doubleword.
4553 ArgOffset += PtrByteSize;
4554 continue;
4555 }
4556
4557 // The value of the object is its address, which is the address of
4558 // its first stack doubleword.
4559 InVals.push_back(FIN);
4560
4561 // Store whatever pieces of the object are in registers to memory.
4562 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4563 if (GPR_idx == Num_GPR_Regs)
4564 break;
4565
4566 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4567 FuncInfo->addLiveInAttr(VReg, Flags);
4568 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4569 SDValue Addr = FIN;
4570 if (j) {
4571 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4572 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4573 }
4574 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4575 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4576 SDValue Store =
4577 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4578 MachinePointerInfo(&*FuncArg, j), ObjType);
4579 MemOps.push_back(Store);
4580 ++GPR_idx;
4581 }
4582 ArgOffset += ArgSize;
4583 continue;
4584 }
4585
4586 switch (ObjectVT.getSimpleVT().SimpleTy) {
4587 default: llvm_unreachable("Unhandled argument type!");
4588 case MVT::i1:
4589 case MVT::i32:
4590 case MVT::i64:
4591 if (Flags.isNest()) {
4592 // The 'nest' parameter, if any, is passed in R11.
4593 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4594 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4595
4596 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4597 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4598
4599 break;
4600 }
4601
4602 // These can be scalar arguments or elements of an integer array type
4603 // passed directly. Clang may use those instead of "byval" aggregate
4604 // types to avoid forcing arguments to memory unnecessarily.
4605 if (GPR_idx != Num_GPR_Regs) {
4606 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4607 FuncInfo->addLiveInAttr(VReg, Flags);
4608 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4609
4610 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4611 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4612 // value to MVT::i64 and then truncate to the correct register size.
4613 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4614 } else {
4615 if (CallConv == CallingConv::Fast)
4616 ComputeArgOffset();
4617
4618 needsLoad = true;
4619 ArgSize = PtrByteSize;
4620 }
4621 if (CallConv != CallingConv::Fast || needsLoad)
4622 ArgOffset += 8;
4623 break;
4624
4625 case MVT::f32:
4626 case MVT::f64:
4627 // These can be scalar arguments or elements of a float array type
4628 // passed directly. The latter are used to implement ELFv2 homogenous
4629 // float aggregates.
4630 if (FPR_idx != Num_FPR_Regs) {
4631 unsigned VReg;
4632
4633 if (ObjectVT == MVT::f32)
4634 VReg = MF.addLiveIn(FPR[FPR_idx],
4635 Subtarget.hasP8Vector()
4636 ? &PPC::VSSRCRegClass
4637 : &PPC::F4RCRegClass);
4638 else
4639 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4640 ? &PPC::VSFRCRegClass
4641 : &PPC::F8RCRegClass);
4642
4643 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4644 ++FPR_idx;
4645 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4646 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4647 // once we support fp <-> gpr moves.
4648
4649 // This can only ever happen in the presence of f32 array types,
4650 // since otherwise we never run out of FPRs before running out
4651 // of GPRs.
4652 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4653 FuncInfo->addLiveInAttr(VReg, Flags);
4654 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4655
4656 if (ObjectVT == MVT::f32) {
4657 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4658 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4659 DAG.getConstant(32, dl, MVT::i32));
4660 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4661 }
4662
4663 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4664 } else {
4665 if (CallConv == CallingConv::Fast)
4666 ComputeArgOffset();
4667
4668 needsLoad = true;
4669 }
4670
4671 // When passing an array of floats, the array occupies consecutive
4672 // space in the argument area; only round up to the next doubleword
4673 // at the end of the array. Otherwise, each float takes 8 bytes.
4674 if (CallConv != CallingConv::Fast || needsLoad) {
4675 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4676 ArgOffset += ArgSize;
4677 if (Flags.isInConsecutiveRegsLast())
4678 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4679 }
4680 break;
4681 case MVT::v4f32:
4682 case MVT::v4i32:
4683 case MVT::v8i16:
4684 case MVT::v16i8:
4685 case MVT::v2f64:
4686 case MVT::v2i64:
4687 case MVT::v1i128:
4688 case MVT::f128:
4689 // These can be scalar arguments or elements of a vector array type
4690 // passed directly. The latter are used to implement ELFv2 homogenous
4691 // vector aggregates.
4692 if (VR_idx != Num_VR_Regs) {
4693 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4694 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4695 ++VR_idx;
4696 } else {
4697 if (CallConv == CallingConv::Fast)
4698 ComputeArgOffset();
4699 needsLoad = true;
4700 }
4701 if (CallConv != CallingConv::Fast || needsLoad)
4702 ArgOffset += 16;
4703 break;
4704 }
4705
4706 // We need to load the argument to a virtual register if we determined
4707 // above that we ran out of physical registers of the appropriate type.
4708 if (needsLoad) {
4709 if (ObjSize < ArgSize && !isLittleEndian)
4710 CurArgOffset += ArgSize - ObjSize;
4711 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4712 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4713 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4714 }
4715
4716 InVals.push_back(ArgVal);
4717 }
4718
4719 // Area that is at least reserved in the caller of this function.
4720 unsigned MinReservedArea;
4721 if (HasParameterArea)
4722 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4723 else
4724 MinReservedArea = LinkageSize;
4725
4726 // Set the size that is at least reserved in caller of this function. Tail
4727 // call optimized functions' reserved stack space needs to be aligned so that
4728 // taking the difference between two stack areas will result in an aligned
4729 // stack.
4730 MinReservedArea =
4731 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4732 FuncInfo->setMinReservedArea(MinReservedArea);
4733
4734 // If the function takes variable number of arguments, make a frame index for
4735 // the start of the first vararg value... for expansion of llvm.va_start.
4736 // On ELFv2ABI spec, it writes:
4737 // C programs that are intended to be *portable* across different compilers
4738 // and architectures must use the header file <stdarg.h> to deal with variable
4739 // argument lists.
4740 if (isVarArg && MFI.hasVAStart()) {
4741 int Depth = ArgOffset;
4742
4743 FuncInfo->setVarArgsFrameIndex(
4744 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4745 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4746
4747 // If this function is vararg, store any remaining integer argument regs
4748 // to their spots on the stack so that they may be loaded by dereferencing
4749 // the result of va_next.
4750 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4751 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4752 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4753 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4754 SDValue Store =
4755 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4756 MemOps.push_back(Store);
4757 // Increment the address by four for the next argument to store
4758 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4759 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4760 }
4761 }
4762
4763 if (!MemOps.empty())
4764 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4765
4766 return Chain;
4767}
4768
4769/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4770/// adjusted to accommodate the arguments for the tailcall.
4771static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4772 unsigned ParamSize) {
4773
4774 if (!isTailCall) return 0;
4775
4777 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4778 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4779 // Remember only if the new adjustment is bigger.
4780 if (SPDiff < FI->getTailCallSPDelta())
4781 FI->setTailCallSPDelta(SPDiff);
4782
4783 return SPDiff;
4784}
4785
4786static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4787
4788static bool callsShareTOCBase(const Function *Caller,
4789 const GlobalValue *CalleeGV,
4790 const TargetMachine &TM) {
4791 // It does not make sense to call callsShareTOCBase() with a caller that
4792 // is PC Relative since PC Relative callers do not have a TOC.
4793#ifndef NDEBUG
4794 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4795 assert(!STICaller->isUsingPCRelativeCalls() &&
4796 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4797#endif
4798
4799 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4800 // don't have enough information to determine if the caller and callee share
4801 // the same TOC base, so we have to pessimistically assume they don't for
4802 // correctness.
4803 if (!CalleeGV)
4804 return false;
4805
4806 // If the callee is preemptable, then the static linker will use a plt-stub
4807 // which saves the toc to the stack, and needs a nop after the call
4808 // instruction to convert to a toc-restore.
4809 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4810 return false;
4811
4812 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4813 // We may need a TOC restore in the situation where the caller requires a
4814 // valid TOC but the callee is PC Relative and does not.
4815 const Function *F = dyn_cast<Function>(CalleeGV);
4816 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4817
4818 // If we have an Alias we can try to get the function from there.
4819 if (Alias) {
4820 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4821 F = dyn_cast<Function>(GlobalObj);
4822 }
4823
4824 // If we still have no valid function pointer we do not have enough
4825 // information to determine if the callee uses PC Relative calls so we must
4826 // assume that it does.
4827 if (!F)
4828 return false;
4829
4830 // If the callee uses PC Relative we cannot guarantee that the callee won't
4831 // clobber the TOC of the caller and so we must assume that the two
4832 // functions do not share a TOC base.
4833 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4834 if (STICallee->isUsingPCRelativeCalls())
4835 return false;
4836
4837 // If the GV is not a strong definition then we need to assume it can be
4838 // replaced by another function at link time. The function that replaces
4839 // it may not share the same TOC as the caller since the callee may be
4840 // replaced by a PC Relative version of the same function.
4841 if (!CalleeGV->isStrongDefinitionForLinker())
4842 return false;
4843
4844 // The medium and large code models are expected to provide a sufficiently
4845 // large TOC to provide all data addressing needs of a module with a
4846 // single TOC.
4847 if (CodeModel::Medium == TM.getCodeModel() ||
4849 return true;
4850
4851 // Any explicitly-specified sections and section prefixes must also match.
4852 // Also, if we're using -ffunction-sections, then each function is always in
4853 // a different section (the same is true for COMDAT functions).
4854 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4855 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4856 return false;
4857 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4858 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4859 return false;
4860 }
4861
4862 return true;
4863}
4864
4865static bool
4867 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4868 assert(Subtarget.is64BitELFABI());
4869
4870 const unsigned PtrByteSize = 8;
4871 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4872
4873 static const MCPhysReg GPR[] = {
4874 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4875 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4876 };
4877 static const MCPhysReg VR[] = {
4878 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4879 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4880 };
4881
4882 const unsigned NumGPRs = std::size(GPR);
4883 const unsigned NumFPRs = 13;
4884 const unsigned NumVRs = std::size(VR);
4885 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4886
4887 unsigned NumBytes = LinkageSize;
4888 unsigned AvailableFPRs = NumFPRs;
4889 unsigned AvailableVRs = NumVRs;
4890
4891 for (const ISD::OutputArg& Param : Outs) {
4892 if (Param.Flags.isNest()) continue;
4893
4894 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4895 LinkageSize, ParamAreaSize, NumBytes,
4896 AvailableFPRs, AvailableVRs))
4897 return true;
4898 }
4899 return false;
4900}
4901
4902static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4903 if (CB.arg_size() != CallerFn->arg_size())
4904 return false;
4905
4906 auto CalleeArgIter = CB.arg_begin();
4907 auto CalleeArgEnd = CB.arg_end();
4908 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4909
4910 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4911 const Value* CalleeArg = *CalleeArgIter;
4912 const Value* CallerArg = &(*CallerArgIter);
4913 if (CalleeArg == CallerArg)
4914 continue;
4915
4916 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4917 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4918 // }
4919 // 1st argument of callee is undef and has the same type as caller.
4920 if (CalleeArg->getType() == CallerArg->getType() &&
4921 isa<UndefValue>(CalleeArg))
4922 continue;
4923
4924 return false;
4925 }
4926
4927 return true;
4928}
4929
4930// Returns true if TCO is possible between the callers and callees
4931// calling conventions.
4932static bool
4934 CallingConv::ID CalleeCC) {
4935 // Tail calls are possible with fastcc and ccc.
4936 auto isTailCallableCC = [] (CallingConv::ID CC){
4937 return CC == CallingConv::C || CC == CallingConv::Fast;
4938 };
4939 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4940 return false;
4941
4942 // We can safely tail call both fastcc and ccc callees from a c calling
4943 // convention caller. If the caller is fastcc, we may have less stack space
4944 // than a non-fastcc caller with the same signature so disable tail-calls in
4945 // that case.
4946 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4947}
4948
4949bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4950 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4951 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4953 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4954 bool isCalleeExternalSymbol) const {
4955 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4956
4957 if (DisableSCO && !TailCallOpt) return false;
4958
4959 // Variadic argument functions are not supported.
4960 if (isVarArg) return false;
4961
4962 // Check that the calling conventions are compatible for tco.
4963 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4964 return false;
4965
4966 // Caller contains any byval parameter is not supported.
4967 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4968 return false;
4969
4970 // Callee contains any byval parameter is not supported, too.
4971 // Note: This is a quick work around, because in some cases, e.g.
4972 // caller's stack size > callee's stack size, we are still able to apply
4973 // sibling call optimization. For example, gcc is able to do SCO for caller1
4974 // in the following example, but not for caller2.
4975 // struct test {
4976 // long int a;
4977 // char ary[56];
4978 // } gTest;
4979 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4980 // b->a = v.a;
4981 // return 0;
4982 // }
4983 // void caller1(struct test a, struct test c, struct test *b) {
4984 // callee(gTest, b); }
4985 // void caller2(struct test *b) { callee(gTest, b); }
4986 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4987 return false;
4988
4989 // If callee and caller use different calling conventions, we cannot pass
4990 // parameters on stack since offsets for the parameter area may be different.
4991 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4992 return false;
4993
4994 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4995 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4996 // callee potentially have different TOC bases then we cannot tail call since
4997 // we need to restore the TOC pointer after the call.
4998 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4999 // We cannot guarantee this for indirect calls or calls to external functions.
5000 // When PC-Relative addressing is used, the concept of the TOC is no longer
5001 // applicable so this check is not required.
5002 // Check first for indirect calls.
5003 if (!Subtarget.isUsingPCRelativeCalls() &&
5004 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5005 return false;
5006
5007 // Check if we share the TOC base.
5008 if (!Subtarget.isUsingPCRelativeCalls() &&
5009 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5010 return false;
5011
5012 // TCO allows altering callee ABI, so we don't have to check further.
5013 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5014 return true;
5015
5016 if (DisableSCO) return false;
5017
5018 // If callee use the same argument list that caller is using, then we can
5019 // apply SCO on this case. If it is not, then we need to check if callee needs
5020 // stack for passing arguments.
5021 // PC Relative tail calls may not have a CallBase.
5022 // If there is no CallBase we cannot verify if we have the same argument
5023 // list so assume that we don't have the same argument list.
5024 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5025 needStackSlotPassParameters(Subtarget, Outs))
5026 return false;
5027 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5028 return false;
5029
5030 return true;
5031}
5032
5033/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5034/// for tail call optimization. Targets which want to do tail call
5035/// optimization should implement this function.
5036bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5037 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5038 CallingConv::ID CallerCC, bool isVarArg,
5039 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5040 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5041 return false;
5042
5043 // Variable argument functions are not supported.
5044 if (isVarArg)
5045 return false;
5046
5047 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5048 // Functions containing by val parameters are not supported.
5049 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5050 return false;
5051
5052 // Non-PIC/GOT tail calls are supported.
5053 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5054 return true;
5055
5056 // At the moment we can only do local tail calls (in same module, hidden
5057 // or protected) if we are generating PIC.
5058 if (CalleeGV)
5059 return CalleeGV->hasHiddenVisibility() ||
5060 CalleeGV->hasProtectedVisibility();
5061 }
5062
5063 return false;
5064}
5065
5066/// isCallCompatibleAddress - Return the immediate to use if the specified
5067/// 32-bit value is representable in the immediate field of a BxA instruction.
5070 if (!C) return nullptr;
5071
5072 int Addr = C->getZExtValue();
5073 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5074 SignExtend32<26>(Addr) != Addr)
5075 return nullptr; // Top 6 bits have to be sext of immediate.
5076
5077 return DAG
5079 (int)C->getZExtValue() >> 2, SDLoc(Op),
5081 .getNode();
5082}
5083
5084namespace {
5085
5086struct TailCallArgumentInfo {
5087 SDValue Arg;
5088 SDValue FrameIdxOp;
5089 int FrameIdx = 0;
5090
5091 TailCallArgumentInfo() = default;
5092};
5093
5094} // end anonymous namespace
5095
5096/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5098 SelectionDAG &DAG, SDValue Chain,
5099 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5100 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5101 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5102 SDValue Arg = TailCallArgs[i].Arg;
5103 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5104 int FI = TailCallArgs[i].FrameIdx;
5105 // Store relative to framepointer.
5106 MemOpChains.push_back(DAG.getStore(
5107 Chain, dl, Arg, FIN,
5109 }
5110}
5111
5112/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5113/// the appropriate stack slot for the tail call optimized function call.
5115 SDValue OldRetAddr, SDValue OldFP,
5116 int SPDiff, const SDLoc &dl) {
5117 if (SPDiff) {
5118 // Calculate the new stack slot for the return address.
5120 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5121 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5122 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5123 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5124 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5125 NewRetAddrLoc, true);
5126 SDValue NewRetAddrFrIdx =
5127 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5128 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5129 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5130 }
5131 return Chain;
5132}
5133
5134/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5135/// the position of the argument.
5137 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5138 int SPDiff, unsigned ArgOffset,
5139 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5140 int Offset = ArgOffset + SPDiff;
5141 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5142 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5143 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5144 SDValue FIN = DAG.getFrameIndex(FI, VT);
5145 TailCallArgumentInfo Info;
5146 Info.Arg = Arg;
5147 Info.FrameIdxOp = FIN;
5148 Info.FrameIdx = FI;
5149 TailCallArguments.push_back(Info);
5150}
5151
5152/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5153/// stack slot. Returns the chain as result and the loaded frame pointers in
5154/// LROpOut/FPOpout. Used when tail calling.
5155SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5156 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5157 SDValue &FPOpOut, const SDLoc &dl) const {
5158 if (SPDiff) {
5159 // Load the LR and FP stack slot for later adjusting.
5160 LROpOut = getReturnAddrFrameIndex(DAG);
5161 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5162 MachinePointerInfo());
5163 Chain = SDValue(LROpOut.getNode(), 1);
5164 }
5165 return Chain;
5166}
5167
5168/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5169/// by "Src" to address "Dst" of size "Size". Alignment information is
5170/// specified by the specific parameter attribute. The copy will be passed as
5171/// a byval function parameter.
5172/// Sometimes what we are copying is the end of a larger object, the part that
5173/// does not fit in registers.
5175 SDValue Chain, ISD::ArgFlagsTy Flags,
5176 SelectionDAG &DAG, const SDLoc &dl) {
5177 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5178 return DAG.getMemcpy(
5179 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5180 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5181}
5182
5183/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5184/// tail calls.
5186 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5187 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5188 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5189 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5191 if (!isTailCall) {
5192 if (isVector) {
5193 SDValue StackPtr;
5194 if (isPPC64)
5195 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5196 else
5197 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5198 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5199 DAG.getConstant(ArgOffset, dl, PtrVT));
5200 }
5201 MemOpChains.push_back(
5202 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5203 // Calculate and remember argument location.
5204 } else
5205 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5206 TailCallArguments);
5207}
5208
5209static void
5211 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5212 SDValue FPOp,
5213 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5214 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5215 // might overwrite each other in case of tail call optimization.
5216 SmallVector<SDValue, 8> MemOpChains2;
5217 // Do not flag preceding copytoreg stuff together with the following stuff.
5218 InGlue = SDValue();
5219 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5220 MemOpChains2, dl);
5221 if (!MemOpChains2.empty())
5222 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5223
5224 // Store the return address to the appropriate stack slot.
5225 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5226
5227 // Emit callseq_end just before tailcall node.
5228 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5229 InGlue = Chain.getValue(1);
5230}
5231
5232// Is this global address that of a function that can be called by name? (as
5233// opposed to something that must hold a descriptor for an indirect call).
5234static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5235 if (GV) {
5236 if (GV->isThreadLocal())
5237 return false;
5238
5239 return GV->getValueType()->isFunctionTy();
5240 }
5241
5242 return false;
5243}
5244
5245SDValue PPCTargetLowering::LowerCallResult(
5246 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5247 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5248 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5250 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5251 *DAG.getContext());
5252
5253 CCRetInfo.AnalyzeCallResult(
5254 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5256 : RetCC_PPC);
5257
5258 // Copy all of the result registers out of their specified physreg.
5259 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5260 CCValAssign &VA = RVLocs[i];
5261 assert(VA.isRegLoc() && "Can only return in registers!");
5262
5263 SDValue Val;
5264
5265 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5266 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5267 InGlue);
5268 Chain = Lo.getValue(1);
5269 InGlue = Lo.getValue(2);
5270 VA = RVLocs[++i]; // skip ahead to next loc
5271 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5272 InGlue);
5273 Chain = Hi.getValue(1);
5274 InGlue = Hi.getValue(2);
5275 if (!Subtarget.isLittleEndian())
5276 std::swap (Lo, Hi);
5277 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5278 } else {
5279 Val = DAG.getCopyFromReg(Chain, dl,
5280 VA.getLocReg(), VA.getLocVT(), InGlue);
5281 Chain = Val.getValue(1);
5282 InGlue = Val.getValue(2);
5283 }
5284
5285 switch (VA.getLocInfo()) {
5286 default: llvm_unreachable("Unknown loc info!");
5287 case CCValAssign::Full: break;
5288 case CCValAssign::AExt:
5289 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5290 break;
5291 case CCValAssign::ZExt:
5292 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5293 DAG.getValueType(VA.getValVT()));
5294 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5295 break;
5296 case CCValAssign::SExt:
5297 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5298 DAG.getValueType(VA.getValVT()));
5299 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5300 break;
5301 }
5302
5303 InVals.push_back(Val);
5304 }
5305
5306 return Chain;
5307}
5308
5309static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5310 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5311 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5312 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5313
5314 // PatchPoint calls are not indirect.
5315 if (isPatchPoint)
5316 return false;
5317
5319 return false;
5320
5321 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5322 // becuase the immediate function pointer points to a descriptor instead of
5323 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5324 // pointer immediate points to the global entry point, while the BLA would
5325 // need to jump to the local entry point (see rL211174).
5326 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5327 isBLACompatibleAddress(Callee, DAG))
5328 return false;
5329
5330 return true;
5331}
5332
5333// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5334static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5335 return Subtarget.isAIXABI() ||
5336 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5337}
5338
5340 const Function &Caller, const SDValue &Callee,
5341 const PPCSubtarget &Subtarget,
5342 const TargetMachine &TM,
5343 bool IsStrictFPCall = false) {
5344 if (CFlags.IsTailCall)
5345 return PPCISD::TC_RETURN;
5346
5347 unsigned RetOpc = 0;
5348 // This is a call through a function pointer.
5349 if (CFlags.IsIndirect) {
5350 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5351 // indirect calls. The save of the caller's TOC pointer to the stack will be
5352 // inserted into the DAG as part of call lowering. The restore of the TOC
5353 // pointer is modeled by using a pseudo instruction for the call opcode that
5354 // represents the 2 instruction sequence of an indirect branch and link,
5355 // immediately followed by a load of the TOC pointer from the stack save
5356 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5357 // as it is not saved or used.
5358 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5359 : PPCISD::BCTRL;
5360 } else if (Subtarget.isUsingPCRelativeCalls()) {
5361 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5362 RetOpc = PPCISD::CALL_NOTOC;
5363 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5364 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5365 // immediately following the call instruction if the caller and callee may
5366 // have different TOC bases. At link time if the linker determines the calls
5367 // may not share a TOC base, the call is redirected to a trampoline inserted
5368 // by the linker. The trampoline will (among other things) save the callers
5369 // TOC pointer at an ABI designated offset in the linkage area and the
5370 // linker will rewrite the nop to be a load of the TOC pointer from the
5371 // linkage area into gpr2.
5372 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5373 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5374 RetOpc =
5375 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5376 } else
5377 RetOpc = PPCISD::CALL;
5378 if (IsStrictFPCall) {
5379 switch (RetOpc) {
5380 default:
5381 llvm_unreachable("Unknown call opcode");
5382 case PPCISD::BCTRL_LOAD_TOC:
5383 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5384 break;
5385 case PPCISD::BCTRL:
5386 RetOpc = PPCISD::BCTRL_RM;
5387 break;
5388 case PPCISD::CALL_NOTOC:
5389 RetOpc = PPCISD::CALL_NOTOC_RM;
5390 break;
5391 case PPCISD::CALL:
5392 RetOpc = PPCISD::CALL_RM;
5393 break;
5394 case PPCISD::CALL_NOP:
5395 RetOpc = PPCISD::CALL_NOP_RM;
5396 break;
5397 }
5398 }
5399 return RetOpc;
5400}
5401
5402static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5403 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5404 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5405 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5406 return SDValue(Dest, 0);
5407
5408 // Returns true if the callee is local, and false otherwise.
5409 auto isLocalCallee = [&]() {
5411 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5412
5413 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5415 };
5416
5417 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5418 // a static relocation model causes some versions of GNU LD (2.17.50, at
5419 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5420 // built with secure-PLT.
5421 bool UsePlt =
5422 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5424
5425 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5426 const TargetMachine &TM = Subtarget.getTargetMachine();
5428 auto *S =
5429 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5430
5432 return DAG.getMCSymbol(S, PtrVT);
5433 };
5434
5435 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5436 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5437 if (isFunctionGlobalAddress(GV)) {
5438 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5439
5440 if (Subtarget.isAIXABI()) {
5441 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5442 return getAIXFuncEntryPointSymbolSDNode(GV);
5443 }
5444 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5445 UsePlt ? PPCII::MO_PLT : 0);
5446 }
5447
5449 const char *SymName = S->getSymbol();
5450 if (Subtarget.isAIXABI()) {
5451 // If there exists a user-declared function whose name is the same as the
5452 // ExternalSymbol's, then we pick up the user-declared version.
5454 if (const Function *F =
5455 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5456 return getAIXFuncEntryPointSymbolSDNode(F);
5457
5458 // On AIX, direct function calls reference the symbol for the function's
5459 // entry point, which is named by prepending a "." before the function's
5460 // C-linkage name. A Qualname is returned here because an external
5461 // function entry point is a csect with XTY_ER property.
5462 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5463 auto &Context = DAG.getMachineFunction().getContext();
5464 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5465 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5467 return Sec->getQualNameSymbol();
5468 };
5469
5470 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5471 }
5472 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5473 UsePlt ? PPCII::MO_PLT : 0);
5474 }
5475
5476 // No transformation needed.
5477 assert(Callee.getNode() && "What no callee?");
5478 return Callee;
5479}
5480
5482 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5483 "Expected a CALLSEQ_STARTSDNode.");
5484
5485 // The last operand is the chain, except when the node has glue. If the node
5486 // has glue, then the last operand is the glue, and the chain is the second
5487 // last operand.
5488 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5489 if (LastValue.getValueType() != MVT::Glue)
5490 return LastValue;
5491
5492 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5493}
5494
5495// Creates the node that moves a functions address into the count register
5496// to prepare for an indirect call instruction.
5497static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5498 SDValue &Glue, SDValue &Chain,
5499 const SDLoc &dl) {
5500 SDValue MTCTROps[] = {Chain, Callee, Glue};
5501 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5502 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5503 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5504 // The glue is the second value produced.
5505 Glue = Chain.getValue(1);
5506}
5507
5509 SDValue &Glue, SDValue &Chain,
5510 SDValue CallSeqStart,
5511 const CallBase *CB, const SDLoc &dl,
5512 bool hasNest,
5513 const PPCSubtarget &Subtarget) {
5514 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5515 // entry point, but to the function descriptor (the function entry point
5516 // address is part of the function descriptor though).
5517 // The function descriptor is a three doubleword structure with the
5518 // following fields: function entry point, TOC base address and
5519 // environment pointer.
5520 // Thus for a call through a function pointer, the following actions need
5521 // to be performed:
5522 // 1. Save the TOC of the caller in the TOC save area of its stack
5523 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5524 // 2. Load the address of the function entry point from the function
5525 // descriptor.
5526 // 3. Load the TOC of the callee from the function descriptor into r2.
5527 // 4. Load the environment pointer from the function descriptor into
5528 // r11.
5529 // 5. Branch to the function entry point address.
5530 // 6. On return of the callee, the TOC of the caller needs to be
5531 // restored (this is done in FinishCall()).
5532 //
5533 // The loads are scheduled at the beginning of the call sequence, and the
5534 // register copies are flagged together to ensure that no other
5535 // operations can be scheduled in between. E.g. without flagging the
5536 // copies together, a TOC access in the caller could be scheduled between
5537 // the assignment of the callee TOC and the branch to the callee, which leads
5538 // to incorrect code.
5539
5540 // Start by loading the function address from the descriptor.
5541 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5542 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5546
5547 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5548
5549 // Registers used in building the DAG.
5550 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5551 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5552
5553 // Offsets of descriptor members.
5554 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5555 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5556
5557 const MVT RegVT = Subtarget.getScalarIntVT();
5558 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5559
5560 // One load for the functions entry point address.
5561 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5562 Alignment, MMOFlags);
5563
5564 // One for loading the TOC anchor for the module that contains the called
5565 // function.
5566 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5567 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5568 SDValue TOCPtr =
5569 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5570 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5571
5572 // One for loading the environment pointer.
5573 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5574 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5575 SDValue LoadEnvPtr =
5576 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5577 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5578
5579
5580 // Then copy the newly loaded TOC anchor to the TOC pointer.
5581 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5582 Chain = TOCVal.getValue(0);
5583 Glue = TOCVal.getValue(1);
5584
5585 // If the function call has an explicit 'nest' parameter, it takes the
5586 // place of the environment pointer.
5587 assert((!hasNest || !Subtarget.isAIXABI()) &&
5588 "Nest parameter is not supported on AIX.");
5589 if (!hasNest) {
5590 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5591 Chain = EnvVal.getValue(0);
5592 Glue = EnvVal.getValue(1);
5593 }
5594
5595 // The rest of the indirect call sequence is the same as the non-descriptor
5596 // DAG.
5597 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5598}
5599
5600static void
5602 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5603 SelectionDAG &DAG,
5604 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5605 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5606 const PPCSubtarget &Subtarget) {
5607 const bool IsPPC64 = Subtarget.isPPC64();
5608 // MVT for a general purpose register.
5609 const MVT RegVT = Subtarget.getScalarIntVT();
5610
5611 // First operand is always the chain.
5612 Ops.push_back(Chain);
5613
5614 // If it's a direct call pass the callee as the second operand.
5615 if (!CFlags.IsIndirect)
5616 Ops.push_back(Callee);
5617 else {
5618 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5619
5620 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5621 // on the stack (this would have been done in `LowerCall_64SVR4` or
5622 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5623 // represents both the indirect branch and a load that restores the TOC
5624 // pointer from the linkage area. The operand for the TOC restore is an add
5625 // of the TOC save offset to the stack pointer. This must be the second
5626 // operand: after the chain input but before any other variadic arguments.
5627 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5628 // saved or used.
5629 if (isTOCSaveRestoreRequired(Subtarget)) {
5630 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5631
5632 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5633 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5634 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5635 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5636 Ops.push_back(AddTOC);
5637 }
5638
5639 // Add the register used for the environment pointer.
5640 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5641 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5642 RegVT));
5643
5644
5645 // Add CTR register as callee so a bctr can be emitted later.
5646 if (CFlags.IsTailCall)
5647 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5648 }
5649
5650 // If this is a tail call add stack pointer delta.
5651 if (CFlags.IsTailCall)
5652 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5653
5654 // Add argument registers to the end of the list so that they are known live
5655 // into the call.
5656 for (const auto &[Reg, N] : RegsToPass)
5657 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5658
5659 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5660 // no way to mark dependencies as implicit here.
5661 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5662 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5663 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5664 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5665
5666 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5667 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5668 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5669
5670 // Add a register mask operand representing the call-preserved registers.
5671 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5672 const uint32_t *Mask =
5673 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5674 assert(Mask && "Missing call preserved mask for calling convention");
5675 Ops.push_back(DAG.getRegisterMask(Mask));
5676
5677 // If the glue is valid, it is the last operand.
5678 if (Glue.getNode())
5679 Ops.push_back(Glue);
5680}
5681
5682SDValue PPCTargetLowering::FinishCall(
5683 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5684 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5685 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5686 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5687 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5688
5689 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5690 Subtarget.isAIXABI())
5691 setUsesTOCBasePtr(DAG);
5692
5693 unsigned CallOpc =
5694 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5695 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5696
5697 if (!CFlags.IsIndirect)
5698 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5699 else if (Subtarget.usesFunctionDescriptors())
5700 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5701 dl, CFlags.HasNest, Subtarget);
5702 else
5703 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5704
5705 // Build the operand list for the call instruction.
5707 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5708 SPDiff, Subtarget);
5709
5710 // Emit tail call.
5711 if (CFlags.IsTailCall) {
5712 // Indirect tail call when using PC Relative calls do not have the same
5713 // constraints.
5714 assert(((Callee.getOpcode() == ISD::Register &&
5715 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5716 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5717 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5718 isa<ConstantSDNode>(Callee) ||
5719 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5720 "Expecting a global address, external symbol, absolute value, "
5721 "register or an indirect tail call when PC Relative calls are "
5722 "used.");
5723 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5724 assert(CallOpc == PPCISD::TC_RETURN &&
5725 "Unexpected call opcode for a tail call.");
5727 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5728 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5729 return Ret;
5730 }
5731
5732 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5733 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5734 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5735 Glue = Chain.getValue(1);
5736
5737 // When performing tail call optimization the callee pops its arguments off
5738 // the stack. Account for this here so these bytes can be pushed back on in
5739 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5740 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5742 ? NumBytes
5743 : 0;
5744
5745 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5746 Glue = Chain.getValue(1);
5747
5748 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5749 DAG, InVals);
5750}
5751
5753 CallingConv::ID CalleeCC = CB->getCallingConv();
5754 const Function *CallerFunc = CB->getCaller();
5755 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5756 const Function *CalleeFunc = CB->getCalledFunction();
5757 if (!CalleeFunc)
5758 return false;
5759 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5760
5763
5764 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5765 CalleeFunc->getAttributes(), Outs, *this,
5766 CalleeFunc->getDataLayout());
5767
5768 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5769 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5770 false /*isCalleeExternalSymbol*/);
5771}
5772
5773bool PPCTargetLowering::isEligibleForTCO(
5774 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5775 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5777 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5778 bool isCalleeExternalSymbol) const {
5779 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5780 return false;
5781
5782 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5783 return IsEligibleForTailCallOptimization_64SVR4(
5784 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5785 isCalleeExternalSymbol);
5786 else
5787 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5788 isVarArg, Ins);
5789}
5790
5791SDValue
5792PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5793 SmallVectorImpl<SDValue> &InVals) const {
5794 SelectionDAG &DAG = CLI.DAG;
5795 SDLoc &dl = CLI.DL;
5797 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5799 SDValue Chain = CLI.Chain;
5800 SDValue Callee = CLI.Callee;
5801 bool &isTailCall = CLI.IsTailCall;
5802 CallingConv::ID CallConv = CLI.CallConv;
5803 bool isVarArg = CLI.IsVarArg;
5804 bool isPatchPoint = CLI.IsPatchPoint;
5805 const CallBase *CB = CLI.CB;
5806
5807 if (isTailCall) {
5809 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5810 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5811 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5812 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5813
5814 isTailCall =
5815 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5816 &(MF.getFunction()), IsCalleeExternalSymbol);
5817 if (isTailCall) {
5818 ++NumTailCalls;
5819 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5820 ++NumSiblingCalls;
5821
5822 // PC Relative calls no longer guarantee that the callee is a Global
5823 // Address Node. The callee could be an indirect tail call in which
5824 // case the SDValue for the callee could be a load (to load the address
5825 // of a function pointer) or it may be a register copy (to move the
5826 // address of the callee from a function parameter into a virtual
5827 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5828 assert((Subtarget.isUsingPCRelativeCalls() ||
5829 isa<GlobalAddressSDNode>(Callee)) &&
5830 "Callee should be an llvm::Function object.");
5831
5832 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5833 << "\nTCO callee: ");
5834 LLVM_DEBUG(Callee.dump());
5835 }
5836 }
5837
5838 if (!isTailCall && CB && CB->isMustTailCall())
5839 report_fatal_error("failed to perform tail call elimination on a call "
5840 "site marked musttail");
5841
5842 // When long calls (i.e. indirect calls) are always used, calls are always
5843 // made via function pointer. If we have a function name, first translate it
5844 // into a pointer.
5845 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5846 !isTailCall)
5847 Callee = LowerGlobalAddress(Callee, DAG);
5848
5849 CallFlags CFlags(
5850 CallConv, isTailCall, isVarArg, isPatchPoint,
5851 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5852 // hasNest
5853 Subtarget.is64BitELFABI() &&
5854 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5855 CLI.NoMerge);
5856
5857 if (Subtarget.isAIXABI())
5858 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5859 InVals, CB);
5860
5861 assert(Subtarget.isSVR4ABI());
5862 if (Subtarget.isPPC64())
5863 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5864 InVals, CB);
5865 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5866 InVals, CB);
5867}
5868
5869SDValue PPCTargetLowering::LowerCall_32SVR4(
5870 SDValue Chain, SDValue Callee, CallFlags CFlags,
5872 const SmallVectorImpl<SDValue> &OutVals,
5873 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5875 const CallBase *CB) const {
5876 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5877 // of the 32-bit SVR4 ABI stack frame layout.
5878
5879 const CallingConv::ID CallConv = CFlags.CallConv;
5880 const bool IsVarArg = CFlags.IsVarArg;
5881 const bool IsTailCall = CFlags.IsTailCall;
5882
5883 assert((CallConv == CallingConv::C ||
5884 CallConv == CallingConv::Cold ||
5885 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5886
5887 const Align PtrAlign(4);
5888
5889 MachineFunction &MF = DAG.getMachineFunction();
5890
5891 // Mark this function as potentially containing a function that contains a
5892 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5893 // and restoring the callers stack pointer in this functions epilog. This is
5894 // done because by tail calling the called function might overwrite the value
5895 // in this function's (MF) stack pointer stack slot 0(SP).
5896 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5897 CallConv == CallingConv::Fast)
5898 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5899
5900 // Count how many bytes are to be pushed on the stack, including the linkage
5901 // area, parameter list area and the part of the local variable space which
5902 // contains copies of aggregates which are passed by value.
5903
5904 // Assign locations to all of the outgoing arguments.
5906 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5907
5908 // Reserve space for the linkage area on the stack.
5909 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5910 PtrAlign);
5911
5912 if (IsVarArg) {
5913 // Handle fixed and variable vector arguments differently.
5914 // Fixed vector arguments go into registers as long as registers are
5915 // available. Variable vector arguments always go into memory.
5916 unsigned NumArgs = Outs.size();
5917
5918 for (unsigned i = 0; i != NumArgs; ++i) {
5919 MVT ArgVT = Outs[i].VT;
5920 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5921 bool Result;
5922
5923 if (!ArgFlags.isVarArg()) {
5924 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5925 Outs[i].OrigTy, CCInfo);
5926 } else {
5928 ArgFlags, Outs[i].OrigTy, CCInfo);
5929 }
5930
5931 if (Result) {
5932#ifndef NDEBUG
5933 errs() << "Call operand #" << i << " has unhandled type "
5934 << ArgVT << "\n";
5935#endif
5936 llvm_unreachable(nullptr);
5937 }
5938 }
5939 } else {
5940 // All arguments are treated the same.
5941 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5942 }
5943
5944 // Assign locations to all of the outgoing aggregate by value arguments.
5945 SmallVector<CCValAssign, 16> ByValArgLocs;
5946 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5947
5948 // Reserve stack space for the allocations in CCInfo.
5949 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5950
5951 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5952
5953 // Size of the linkage area, parameter list area and the part of the local
5954 // space variable where copies of aggregates which are passed by value are
5955 // stored.
5956 unsigned NumBytes = CCByValInfo.getStackSize();
5957
5958 // Calculate by how many bytes the stack has to be adjusted in case of tail
5959 // call optimization.
5960 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5961
5962 // Adjust the stack pointer for the new arguments...
5963 // These operations are automatically eliminated by the prolog/epilog pass
5964 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5965 SDValue CallSeqStart = Chain;
5966
5967 // Load the return address and frame pointer so it can be moved somewhere else
5968 // later.
5969 SDValue LROp, FPOp;
5970 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5971
5972 // Set up a copy of the stack pointer for use loading and storing any
5973 // arguments that may not fit in the registers available for argument
5974 // passing.
5975 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5976
5978 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5979 SmallVector<SDValue, 8> MemOpChains;
5980
5981 bool seenFloatArg = false;
5982 // Walk the register/memloc assignments, inserting copies/loads.
5983 // i - Tracks the index into the list of registers allocated for the call
5984 // RealArgIdx - Tracks the index into the list of actual function arguments
5985 // j - Tracks the index into the list of byval arguments
5986 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5987 i != e;
5988 ++i, ++RealArgIdx) {
5989 CCValAssign &VA = ArgLocs[i];
5990 SDValue Arg = OutVals[RealArgIdx];
5991 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5992
5993 if (Flags.isByVal()) {
5994 // Argument is an aggregate which is passed by value, thus we need to
5995 // create a copy of it in the local variable space of the current stack
5996 // frame (which is the stack frame of the caller) and pass the address of
5997 // this copy to the callee.
5998 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5999 CCValAssign &ByValVA = ByValArgLocs[j++];
6000 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6001
6002 // Memory reserved in the local variable space of the callers stack frame.
6003 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6004
6005 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6006 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6007 StackPtr, PtrOff);
6008
6009 // Create a copy of the argument in the local area of the current
6010 // stack frame.
6011 SDValue MemcpyCall =
6012 CreateCopyOfByValArgument(Arg, PtrOff,
6013 CallSeqStart.getNode()->getOperand(0),
6014 Flags, DAG, dl);
6015
6016 // This must go outside the CALLSEQ_START..END.
6017 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6018 SDLoc(MemcpyCall));
6019 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6020 NewCallSeqStart.getNode());
6021 Chain = CallSeqStart = NewCallSeqStart;
6022
6023 // Pass the address of the aggregate copy on the stack either in a
6024 // physical register or in the parameter list area of the current stack
6025 // frame to the callee.
6026 Arg = PtrOff;
6027 }
6028
6029 // When useCRBits() is true, there can be i1 arguments.
6030 // It is because getRegisterType(MVT::i1) => MVT::i1,
6031 // and for other integer types getRegisterType() => MVT::i32.
6032 // Extend i1 and ensure callee will get i32.
6033 if (Arg.getValueType() == MVT::i1)
6034 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6035 dl, MVT::i32, Arg);
6036
6037 if (VA.isRegLoc()) {
6038 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6039 // Put argument in a physical register.
6040 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6041 bool IsLE = Subtarget.isLittleEndian();
6042 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6043 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6044 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6045 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6046 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6047 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6048 SVal.getValue(0)));
6049 } else
6050 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6051 } else {
6052 // Put argument in the parameter list area of the current stack frame.
6053 assert(VA.isMemLoc());
6054 unsigned LocMemOffset = VA.getLocMemOffset();
6055
6056 if (!IsTailCall) {
6057 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6058 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6059 StackPtr, PtrOff);
6060
6061 MemOpChains.push_back(
6062 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6063 } else {
6064 // Calculate and remember argument location.
6065 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6066 TailCallArguments);
6067 }
6068 }
6069 }
6070
6071 if (!MemOpChains.empty())
6072 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6073
6074 // Build a sequence of copy-to-reg nodes chained together with token chain
6075 // and flag operands which copy the outgoing args into the appropriate regs.
6076 SDValue InGlue;
6077 for (const auto &[Reg, N] : RegsToPass) {
6078 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6079 InGlue = Chain.getValue(1);
6080 }
6081
6082 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6083 // registers.
6084 if (IsVarArg) {
6085 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6086 SDValue Ops[] = { Chain, InGlue };
6087
6088 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6089 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6090
6091 InGlue = Chain.getValue(1);
6092 }
6093
6094 if (IsTailCall)
6095 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6096 TailCallArguments);
6097
6098 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6099 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6100}
6101
6102// Copy an argument into memory, being careful to do this outside the
6103// call sequence for the call to which the argument belongs.
6104SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6105 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6106 SelectionDAG &DAG, const SDLoc &dl) const {
6107 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6108 CallSeqStart.getNode()->getOperand(0),
6109 Flags, DAG, dl);
6110 // The MEMCPY must go outside the CALLSEQ_START..END.
6111 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6112 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6113 SDLoc(MemcpyCall));
6114 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6115 NewCallSeqStart.getNode());
6116 return NewCallSeqStart;
6117}
6118
6119SDValue PPCTargetLowering::LowerCall_64SVR4(
6120 SDValue Chain, SDValue Callee, CallFlags CFlags,
6122 const SmallVectorImpl<SDValue> &OutVals,
6123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6125 const CallBase *CB) const {
6126 bool isELFv2ABI = Subtarget.isELFv2ABI();
6127 bool isLittleEndian = Subtarget.isLittleEndian();
6128 unsigned NumOps = Outs.size();
6129 bool IsSibCall = false;
6130 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6131
6132 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6133 unsigned PtrByteSize = 8;
6134
6135 MachineFunction &MF = DAG.getMachineFunction();
6136
6137 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6138 IsSibCall = true;
6139
6140 // Mark this function as potentially containing a function that contains a
6141 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6142 // and restoring the callers stack pointer in this functions epilog. This is
6143 // done because by tail calling the called function might overwrite the value
6144 // in this function's (MF) stack pointer stack slot 0(SP).
6145 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6146 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6147
6148 assert(!(IsFastCall && CFlags.IsVarArg) &&
6149 "fastcc not supported on varargs functions");
6150
6151 // Count how many bytes are to be pushed on the stack, including the linkage
6152 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6153 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6154 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6155 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6156 unsigned NumBytes = LinkageSize;
6157 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6158
6159 static const MCPhysReg GPR[] = {
6160 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6161 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6162 };
6163 static const MCPhysReg VR[] = {
6164 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6165 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6166 };
6167
6168 const unsigned NumGPRs = std::size(GPR);
6169 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6170 const unsigned NumVRs = std::size(VR);
6171
6172 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6173 // can be passed to the callee in registers.
6174 // For the fast calling convention, there is another check below.
6175 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6176 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6177 if (!HasParameterArea) {
6178 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6179 unsigned AvailableFPRs = NumFPRs;
6180 unsigned AvailableVRs = NumVRs;
6181 unsigned NumBytesTmp = NumBytes;
6182 for (unsigned i = 0; i != NumOps; ++i) {
6183 if (Outs[i].Flags.isNest()) continue;
6184 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6185 PtrByteSize, LinkageSize, ParamAreaSize,
6186 NumBytesTmp, AvailableFPRs, AvailableVRs))
6187 HasParameterArea = true;
6188 }
6189 }
6190
6191 // When using the fast calling convention, we don't provide backing for
6192 // arguments that will be in registers.
6193 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6194
6195 // Avoid allocating parameter area for fastcc functions if all the arguments
6196 // can be passed in the registers.
6197 if (IsFastCall)
6198 HasParameterArea = false;
6199
6200 // Add up all the space actually used.
6201 for (unsigned i = 0; i != NumOps; ++i) {
6202 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6203 EVT ArgVT = Outs[i].VT;
6204 EVT OrigVT = Outs[i].ArgVT;
6205
6206 if (Flags.isNest())
6207 continue;
6208
6209 if (IsFastCall) {
6210 if (Flags.isByVal()) {
6211 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6212 if (NumGPRsUsed > NumGPRs)
6213 HasParameterArea = true;
6214 } else {
6215 switch (ArgVT.getSimpleVT().SimpleTy) {
6216 default: llvm_unreachable("Unexpected ValueType for argument!");
6217 case MVT::i1:
6218 case MVT::i32:
6219 case MVT::i64:
6220 if (++NumGPRsUsed <= NumGPRs)
6221 continue;
6222 break;
6223 case MVT::v4i32:
6224 case MVT::v8i16:
6225 case MVT::v16i8:
6226 case MVT::v2f64:
6227 case MVT::v2i64:
6228 case MVT::v1i128:
6229 case MVT::f128:
6230 if (++NumVRsUsed <= NumVRs)
6231 continue;
6232 break;
6233 case MVT::v4f32:
6234 if (++NumVRsUsed <= NumVRs)
6235 continue;
6236 break;
6237 case MVT::f32:
6238 case MVT::f64:
6239 if (++NumFPRsUsed <= NumFPRs)
6240 continue;
6241 break;
6242 }
6243 HasParameterArea = true;
6244 }
6245 }
6246
6247 /* Respect alignment of argument on the stack. */
6248 auto Alignement =
6249 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6250 NumBytes = alignTo(NumBytes, Alignement);
6251
6252 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6253 if (Flags.isInConsecutiveRegsLast())
6254 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6255 }
6256
6257 unsigned NumBytesActuallyUsed = NumBytes;
6258
6259 // In the old ELFv1 ABI,
6260 // the prolog code of the callee may store up to 8 GPR argument registers to
6261 // the stack, allowing va_start to index over them in memory if its varargs.
6262 // Because we cannot tell if this is needed on the caller side, we have to
6263 // conservatively assume that it is needed. As such, make sure we have at
6264 // least enough stack space for the caller to store the 8 GPRs.
6265 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6266 // really requires memory operands, e.g. a vararg function.
6267 if (HasParameterArea)
6268 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6269 else
6270 NumBytes = LinkageSize;
6271
6272 // Tail call needs the stack to be aligned.
6273 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6274 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6275
6276 int SPDiff = 0;
6277
6278 // Calculate by how many bytes the stack has to be adjusted in case of tail
6279 // call optimization.
6280 if (!IsSibCall)
6281 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6282
6283 // To protect arguments on the stack from being clobbered in a tail call,
6284 // force all the loads to happen before doing any other lowering.
6285 if (CFlags.IsTailCall)
6286 Chain = DAG.getStackArgumentTokenFactor(Chain);
6287
6288 // Adjust the stack pointer for the new arguments...
6289 // These operations are automatically eliminated by the prolog/epilog pass
6290 if (!IsSibCall)
6291 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6292 SDValue CallSeqStart = Chain;
6293
6294 // Load the return address and frame pointer so it can be move somewhere else
6295 // later.
6296 SDValue LROp, FPOp;
6297 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6298
6299 // Set up a copy of the stack pointer for use loading and storing any
6300 // arguments that may not fit in the registers available for argument
6301 // passing.
6302 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6303
6304 // Figure out which arguments are going to go in registers, and which in
6305 // memory. Also, if this is a vararg function, floating point operations
6306 // must be stored to our stack, and loaded into integer regs as well, if
6307 // any integer regs are available for argument passing.
6308 unsigned ArgOffset = LinkageSize;
6309
6311 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6312
6313 SmallVector<SDValue, 8> MemOpChains;
6314 for (unsigned i = 0; i != NumOps; ++i) {
6315 SDValue Arg = OutVals[i];
6316 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6317 EVT ArgVT = Outs[i].VT;
6318 EVT OrigVT = Outs[i].ArgVT;
6319
6320 // PtrOff will be used to store the current argument to the stack if a
6321 // register cannot be found for it.
6322 SDValue PtrOff;
6323
6324 // We re-align the argument offset for each argument, except when using the
6325 // fast calling convention, when we need to make sure we do that only when
6326 // we'll actually use a stack slot.
6327 auto ComputePtrOff = [&]() {
6328 /* Respect alignment of argument on the stack. */
6329 auto Alignment =
6330 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6331 ArgOffset = alignTo(ArgOffset, Alignment);
6332
6333 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6334
6335 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6336 };
6337
6338 if (!IsFastCall) {
6339 ComputePtrOff();
6340
6341 /* Compute GPR index associated with argument offset. */
6342 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6343 GPR_idx = std::min(GPR_idx, NumGPRs);
6344 }
6345
6346 // Promote integers to 64-bit values.
6347 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6348 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6349 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6350 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6351 }
6352
6353 // FIXME memcpy is used way more than necessary. Correctness first.
6354 // Note: "by value" is code for passing a structure by value, not
6355 // basic types.
6356 if (Flags.isByVal()) {
6357 // Note: Size includes alignment padding, so
6358 // struct x { short a; char b; }
6359 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6360 // These are the proper values we need for right-justifying the
6361 // aggregate in a parameter register.
6362 unsigned Size = Flags.getByValSize();
6363
6364 // An empty aggregate parameter takes up no storage and no
6365 // registers.
6366 if (Size == 0)
6367 continue;
6368
6369 if (IsFastCall)
6370 ComputePtrOff();
6371
6372 // All aggregates smaller than 8 bytes must be passed right-justified.
6373 if (Size==1 || Size==2 || Size==4) {
6374 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6375 if (GPR_idx != NumGPRs) {
6376 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6377 MachinePointerInfo(), VT);
6378 MemOpChains.push_back(Load.getValue(1));
6379 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6380
6381 ArgOffset += PtrByteSize;
6382 continue;
6383 }
6384 }
6385
6386 if (GPR_idx == NumGPRs && Size < 8) {
6387 SDValue AddPtr = PtrOff;
6388 if (!isLittleEndian) {
6389 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6390 PtrOff.getValueType());
6391 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6392 }
6393 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6394 CallSeqStart,
6395 Flags, DAG, dl);
6396 ArgOffset += PtrByteSize;
6397 continue;
6398 }
6399 // Copy the object to parameter save area if it can not be entirely passed
6400 // by registers.
6401 // FIXME: we only need to copy the parts which need to be passed in
6402 // parameter save area. For the parts passed by registers, we don't need
6403 // to copy them to the stack although we need to allocate space for them
6404 // in parameter save area.
6405 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6406 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6407 CallSeqStart,
6408 Flags, DAG, dl);
6409
6410 // When a register is available, pass a small aggregate right-justified.
6411 if (Size < 8 && GPR_idx != NumGPRs) {
6412 // The easiest way to get this right-justified in a register
6413 // is to copy the structure into the rightmost portion of a
6414 // local variable slot, then load the whole slot into the
6415 // register.
6416 // FIXME: The memcpy seems to produce pretty awful code for
6417 // small aggregates, particularly for packed ones.
6418 // FIXME: It would be preferable to use the slot in the
6419 // parameter save area instead of a new local variable.
6420 SDValue AddPtr = PtrOff;
6421 if (!isLittleEndian) {
6422 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6423 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6424 }
6425 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6426 CallSeqStart,
6427 Flags, DAG, dl);
6428
6429 // Load the slot into the register.
6430 SDValue Load =
6431 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6432 MemOpChains.push_back(Load.getValue(1));
6433 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6434
6435 // Done with this argument.
6436 ArgOffset += PtrByteSize;
6437 continue;
6438 }
6439
6440 // For aggregates larger than PtrByteSize, copy the pieces of the
6441 // object that fit into registers from the parameter save area.
6442 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6443 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6444 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6445 if (GPR_idx != NumGPRs) {
6446 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6447 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6448 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6449 MachinePointerInfo(), ObjType);
6450
6451 MemOpChains.push_back(Load.getValue(1));
6452 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6453 ArgOffset += PtrByteSize;
6454 } else {
6455 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6456 break;
6457 }
6458 }
6459 continue;
6460 }
6461
6462 switch (Arg.getSimpleValueType().SimpleTy) {
6463 default: llvm_unreachable("Unexpected ValueType for argument!");
6464 case MVT::i1:
6465 case MVT::i32:
6466 case MVT::i64:
6467 if (Flags.isNest()) {
6468 // The 'nest' parameter, if any, is passed in R11.
6469 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6470 break;
6471 }
6472
6473 // These can be scalar arguments or elements of an integer array type
6474 // passed directly. Clang may use those instead of "byval" aggregate
6475 // types to avoid forcing arguments to memory unnecessarily.
6476 if (GPR_idx != NumGPRs) {
6477 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6478 } else {
6479 if (IsFastCall)
6480 ComputePtrOff();
6481
6482 assert(HasParameterArea &&
6483 "Parameter area must exist to pass an argument in memory.");
6484 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6485 true, CFlags.IsTailCall, false, MemOpChains,
6486 TailCallArguments, dl);
6487 if (IsFastCall)
6488 ArgOffset += PtrByteSize;
6489 }
6490 if (!IsFastCall)
6491 ArgOffset += PtrByteSize;
6492 break;
6493 case MVT::f32:
6494 case MVT::f64: {
6495 // These can be scalar arguments or elements of a float array type
6496 // passed directly. The latter are used to implement ELFv2 homogenous
6497 // float aggregates.
6498
6499 // Named arguments go into FPRs first, and once they overflow, the
6500 // remaining arguments go into GPRs and then the parameter save area.
6501 // Unnamed arguments for vararg functions always go to GPRs and
6502 // then the parameter save area. For now, put all arguments to vararg
6503 // routines always in both locations (FPR *and* GPR or stack slot).
6504 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6505 bool NeededLoad = false;
6506
6507 // First load the argument into the next available FPR.
6508 if (FPR_idx != NumFPRs)
6509 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6510
6511 // Next, load the argument into GPR or stack slot if needed.
6512 if (!NeedGPROrStack)
6513 ;
6514 else if (GPR_idx != NumGPRs && !IsFastCall) {
6515 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6516 // once we support fp <-> gpr moves.
6517
6518 // In the non-vararg case, this can only ever happen in the
6519 // presence of f32 array types, since otherwise we never run
6520 // out of FPRs before running out of GPRs.
6521 SDValue ArgVal;
6522
6523 // Double values are always passed in a single GPR.
6524 if (Arg.getValueType() != MVT::f32) {
6525 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6526
6527 // Non-array float values are extended and passed in a GPR.
6528 } else if (!Flags.isInConsecutiveRegs()) {
6529 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6530 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6531
6532 // If we have an array of floats, we collect every odd element
6533 // together with its predecessor into one GPR.
6534 } else if (ArgOffset % PtrByteSize != 0) {
6535 SDValue Lo, Hi;
6536 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6537 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6538 if (!isLittleEndian)
6539 std::swap(Lo, Hi);
6540 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6541
6542 // The final element, if even, goes into the first half of a GPR.
6543 } else if (Flags.isInConsecutiveRegsLast()) {
6544 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6545 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6546 if (!isLittleEndian)
6547 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6548 DAG.getConstant(32, dl, MVT::i32));
6549
6550 // Non-final even elements are skipped; they will be handled
6551 // together the with subsequent argument on the next go-around.
6552 } else
6553 ArgVal = SDValue();
6554
6555 if (ArgVal.getNode())
6556 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6557 } else {
6558 if (IsFastCall)
6559 ComputePtrOff();
6560
6561 // Single-precision floating-point values are mapped to the
6562 // second (rightmost) word of the stack doubleword.
6563 if (Arg.getValueType() == MVT::f32 &&
6564 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6565 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6566 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6567 }
6568
6569 assert(HasParameterArea &&
6570 "Parameter area must exist to pass an argument in memory.");
6571 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572 true, CFlags.IsTailCall, false, MemOpChains,
6573 TailCallArguments, dl);
6574
6575 NeededLoad = true;
6576 }
6577 // When passing an array of floats, the array occupies consecutive
6578 // space in the argument area; only round up to the next doubleword
6579 // at the end of the array. Otherwise, each float takes 8 bytes.
6580 if (!IsFastCall || NeededLoad) {
6581 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6582 Flags.isInConsecutiveRegs()) ? 4 : 8;
6583 if (Flags.isInConsecutiveRegsLast())
6584 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6585 }
6586 break;
6587 }
6588 case MVT::v4f32:
6589 case MVT::v4i32:
6590 case MVT::v8i16:
6591 case MVT::v16i8:
6592 case MVT::v2f64:
6593 case MVT::v2i64:
6594 case MVT::v1i128:
6595 case MVT::f128:
6596 // These can be scalar arguments or elements of a vector array type
6597 // passed directly. The latter are used to implement ELFv2 homogenous
6598 // vector aggregates.
6599
6600 // For a varargs call, named arguments go into VRs or on the stack as
6601 // usual; unnamed arguments always go to the stack or the corresponding
6602 // GPRs when within range. For now, we always put the value in both
6603 // locations (or even all three).
6604 if (CFlags.IsVarArg) {
6605 assert(HasParameterArea &&
6606 "Parameter area must exist if we have a varargs call.");
6607 // We could elide this store in the case where the object fits
6608 // entirely in R registers. Maybe later.
6609 SDValue Store =
6610 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6611 MemOpChains.push_back(Store);
6612 if (VR_idx != NumVRs) {
6613 SDValue Load =
6614 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6615 MemOpChains.push_back(Load.getValue(1));
6616 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6617 }
6618 ArgOffset += 16;
6619 for (unsigned i=0; i<16; i+=PtrByteSize) {
6620 if (GPR_idx == NumGPRs)
6621 break;
6622 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6623 DAG.getConstant(i, dl, PtrVT));
6624 SDValue Load =
6625 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6626 MemOpChains.push_back(Load.getValue(1));
6627 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6628 }
6629 break;
6630 }
6631
6632 // Non-varargs Altivec params go into VRs or on the stack.
6633 if (VR_idx != NumVRs) {
6634 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6635 } else {
6636 if (IsFastCall)
6637 ComputePtrOff();
6638
6639 assert(HasParameterArea &&
6640 "Parameter area must exist to pass an argument in memory.");
6641 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6642 true, CFlags.IsTailCall, true, MemOpChains,
6643 TailCallArguments, dl);
6644 if (IsFastCall)
6645 ArgOffset += 16;
6646 }
6647
6648 if (!IsFastCall)
6649 ArgOffset += 16;
6650 break;
6651 }
6652 }
6653
6654 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6655 "mismatch in size of parameter area");
6656 (void)NumBytesActuallyUsed;
6657
6658 if (!MemOpChains.empty())
6659 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6660
6661 // Check if this is an indirect call (MTCTR/BCTRL).
6662 // See prepareDescriptorIndirectCall and buildCallOperands for more
6663 // information about calls through function pointers in the 64-bit SVR4 ABI.
6664 if (CFlags.IsIndirect) {
6665 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6666 // caller in the TOC save area.
6667 if (isTOCSaveRestoreRequired(Subtarget)) {
6668 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6669 // Load r2 into a virtual register and store it to the TOC save area.
6670 setUsesTOCBasePtr(DAG);
6671 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6672 // TOC save area offset.
6673 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6674 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6675 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6676 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6678 DAG.getMachineFunction(), TOCSaveOffset));
6679 }
6680 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6681 // This does not mean the MTCTR instruction must use R12; it's easier
6682 // to model this as an extra parameter, so do that.
6683 if (isELFv2ABI && !CFlags.IsPatchPoint)
6684 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6685 }
6686
6687 // Build a sequence of copy-to-reg nodes chained together with token chain
6688 // and flag operands which copy the outgoing args into the appropriate regs.
6689 SDValue InGlue;
6690 for (const auto &[Reg, N] : RegsToPass) {
6691 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6692 InGlue = Chain.getValue(1);
6693 }
6694
6695 if (CFlags.IsTailCall && !IsSibCall)
6696 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6697 TailCallArguments);
6698
6699 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6700 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6701}
6702
6703// Returns true when the shadow of a general purpose argument register
6704// in the parameter save area is aligned to at least 'RequiredAlign'.
6705static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6706 assert(RequiredAlign.value() <= 16 &&
6707 "Required alignment greater than stack alignment.");
6708 switch (Reg) {
6709 default:
6710 report_fatal_error("called on invalid register.");
6711 case PPC::R5:
6712 case PPC::R9:
6713 case PPC::X3:
6714 case PPC::X5:
6715 case PPC::X7:
6716 case PPC::X9:
6717 // These registers are 16 byte aligned which is the most strict aligment
6718 // we can support.
6719 return true;
6720 case PPC::R3:
6721 case PPC::R7:
6722 case PPC::X4:
6723 case PPC::X6:
6724 case PPC::X8:
6725 case PPC::X10:
6726 // The shadow of these registers in the PSA is 8 byte aligned.
6727 return RequiredAlign <= 8;
6728 case PPC::R4:
6729 case PPC::R6:
6730 case PPC::R8:
6731 case PPC::R10:
6732 return RequiredAlign <= 4;
6733 }
6734}
6735
6736static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6737 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6738 Type *OrigTy, CCState &State) {
6739 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6740 State.getMachineFunction().getSubtarget());
6741 const bool IsPPC64 = Subtarget.isPPC64();
6742 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6743 const Align PtrAlign(PtrSize);
6744 const Align StackAlign(16);
6745 const MVT RegVT = Subtarget.getScalarIntVT();
6746
6747 if (ValVT == MVT::f128)
6748 report_fatal_error("f128 is unimplemented on AIX.");
6749
6750 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6751 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6752 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6753 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6754 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6755 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6756
6757 static const MCPhysReg VR[] = {// Vector registers.
6758 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6759 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6760 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6761
6762 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6763
6764 if (ArgFlags.isNest()) {
6765 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6766 if (!EnvReg)
6767 report_fatal_error("More then one nest argument.");
6768 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6769 return false;
6770 }
6771
6772 if (ArgFlags.isByVal()) {
6773 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6774 if (ByValAlign > StackAlign)
6775 report_fatal_error("Pass-by-value arguments with alignment greater than "
6776 "16 are not supported.");
6777
6778 const unsigned ByValSize = ArgFlags.getByValSize();
6779 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6780
6781 // An empty aggregate parameter takes up no storage and no registers,
6782 // but needs a MemLoc for a stack slot for the formal arguments side.
6783 if (ByValSize == 0) {
6785 State.getStackSize(), RegVT, LocInfo));
6786 return false;
6787 }
6788
6789 // Shadow allocate any registers that are not properly aligned.
6790 unsigned NextReg = State.getFirstUnallocated(GPRs);
6791 while (NextReg != GPRs.size() &&
6792 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6793 // Shadow allocate next registers since its aligment is not strict enough.
6794 MCRegister Reg = State.AllocateReg(GPRs);
6795 // Allocate the stack space shadowed by said register.
6796 State.AllocateStack(PtrSize, PtrAlign);
6797 assert(Reg && "Alocating register unexpectedly failed.");
6798 (void)Reg;
6799 NextReg = State.getFirstUnallocated(GPRs);
6800 }
6801
6802 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6803 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6804 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6805 if (MCRegister Reg = State.AllocateReg(GPRs))
6806 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6807 else {
6810 LocInfo));
6811 break;
6812 }
6813 }
6814 return false;
6815 }
6816
6817 // Arguments always reserve parameter save area.
6818 switch (ValVT.SimpleTy) {
6819 default:
6820 report_fatal_error("Unhandled value type for argument.");
6821 case MVT::i64:
6822 // i64 arguments should have been split to i32 for PPC32.
6823 assert(IsPPC64 && "PPC32 should have split i64 values.");
6824 [[fallthrough]];
6825 case MVT::i1:
6826 case MVT::i32: {
6827 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6828 // AIX integer arguments are always passed in register width.
6829 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6830 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6832 if (MCRegister Reg = State.AllocateReg(GPRs))
6833 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6834 else
6835 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6836
6837 return false;
6838 }
6839 case MVT::f32:
6840 case MVT::f64: {
6841 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6842 const unsigned StoreSize = LocVT.getStoreSize();
6843 // Floats are always 4-byte aligned in the PSA on AIX.
6844 // This includes f64 in 64-bit mode for ABI compatibility.
6845 const unsigned Offset =
6846 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6847 MCRegister FReg = State.AllocateReg(FPR);
6848 if (FReg)
6849 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6850
6851 // Reserve and initialize GPRs or initialize the PSA as required.
6852 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6853 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6854 assert(FReg && "An FPR should be available when a GPR is reserved.");
6855 if (State.isVarArg()) {
6856 // Successfully reserved GPRs are only initialized for vararg calls.
6857 // Custom handling is required for:
6858 // f64 in PPC32 needs to be split into 2 GPRs.
6859 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6860 State.addLoc(
6861 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6862 }
6863 } else {
6864 // If there are insufficient GPRs, the PSA needs to be initialized.
6865 // Initialization occurs even if an FPR was initialized for
6866 // compatibility with the AIX XL compiler. The full memory for the
6867 // argument will be initialized even if a prior word is saved in GPR.
6868 // A custom memLoc is used when the argument also passes in FPR so
6869 // that the callee handling can skip over it easily.
6870 State.addLoc(
6871 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6872 LocInfo)
6873 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6874 break;
6875 }
6876 }
6877
6878 return false;
6879 }
6880 case MVT::v4f32:
6881 case MVT::v4i32:
6882 case MVT::v8i16:
6883 case MVT::v16i8:
6884 case MVT::v2i64:
6885 case MVT::v2f64:
6886 case MVT::v1i128: {
6887 const unsigned VecSize = 16;
6888 const Align VecAlign(VecSize);
6889
6890 if (!State.isVarArg()) {
6891 // If there are vector registers remaining we don't consume any stack
6892 // space.
6893 if (MCRegister VReg = State.AllocateReg(VR)) {
6894 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6895 return false;
6896 }
6897 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6898 // might be allocated in the portion of the PSA that is shadowed by the
6899 // GPRs.
6900 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6901 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6902 return false;
6903 }
6904
6905 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6906 // Burn any underaligned registers and their shadowed stack space until
6907 // we reach the required alignment.
6908 while (NextRegIndex != GPRs.size() &&
6909 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6910 // Shadow allocate register and its stack shadow.
6911 MCRegister Reg = State.AllocateReg(GPRs);
6912 State.AllocateStack(PtrSize, PtrAlign);
6913 assert(Reg && "Allocating register unexpectedly failed.");
6914 (void)Reg;
6915 NextRegIndex = State.getFirstUnallocated(GPRs);
6916 }
6917
6918 // Vectors that are passed as fixed arguments are handled differently.
6919 // They are passed in VRs if any are available (unlike arguments passed
6920 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6921 // functions)
6922 if (!ArgFlags.isVarArg()) {
6923 if (MCRegister VReg = State.AllocateReg(VR)) {
6924 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6925 // Shadow allocate GPRs and stack space even though we pass in a VR.
6926 for (unsigned I = 0; I != VecSize; I += PtrSize)
6927 State.AllocateReg(GPRs);
6928 State.AllocateStack(VecSize, VecAlign);
6929 return false;
6930 }
6931 // No vector registers remain so pass on the stack.
6932 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6933 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6934 return false;
6935 }
6936
6937 // If all GPRS are consumed then we pass the argument fully on the stack.
6938 if (NextRegIndex == GPRs.size()) {
6939 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6940 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6941 return false;
6942 }
6943
6944 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6945 // half of the argument, and then need to pass the remaining half on the
6946 // stack.
6947 if (GPRs[NextRegIndex] == PPC::R9) {
6948 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6949 State.addLoc(
6950 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6951
6952 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6953 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6954 assert(FirstReg && SecondReg &&
6955 "Allocating R9 or R10 unexpectedly failed.");
6956 State.addLoc(
6957 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6958 State.addLoc(
6959 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6960 return false;
6961 }
6962
6963 // We have enough GPRs to fully pass the vector argument, and we have
6964 // already consumed any underaligned registers. Start with the custom
6965 // MemLoc and then the custom RegLocs.
6966 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6967 State.addLoc(
6968 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6969 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6970 const MCRegister Reg = State.AllocateReg(GPRs);
6971 assert(Reg && "Failed to allocated register for vararg vector argument");
6972 State.addLoc(
6973 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6974 }
6975 return false;
6976 }
6977 }
6978 return true;
6979}
6980
6981// So far, this function is only used by LowerFormalArguments_AIX()
6983 bool IsPPC64,
6984 bool HasP8Vector,
6985 bool HasVSX) {
6986 assert((IsPPC64 || SVT != MVT::i64) &&
6987 "i64 should have been split for 32-bit codegen.");
6988
6989 switch (SVT) {
6990 default:
6991 report_fatal_error("Unexpected value type for formal argument");
6992 case MVT::i1:
6993 case MVT::i32:
6994 case MVT::i64:
6995 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6996 case MVT::f32:
6997 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6998 case MVT::f64:
6999 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7000 case MVT::v4f32:
7001 case MVT::v4i32:
7002 case MVT::v8i16:
7003 case MVT::v16i8:
7004 case MVT::v2i64:
7005 case MVT::v2f64:
7006 case MVT::v1i128:
7007 return &PPC::VRRCRegClass;
7008 }
7009}
7010
7012 SelectionDAG &DAG, SDValue ArgValue,
7013 MVT LocVT, const SDLoc &dl) {
7014 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7015 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7016
7017 if (Flags.isSExt())
7018 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7019 DAG.getValueType(ValVT));
7020 else if (Flags.isZExt())
7021 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7022 DAG.getValueType(ValVT));
7023
7024 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7025}
7026
7027static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7028 const unsigned LASize = FL->getLinkageSize();
7029
7030 if (PPC::GPRCRegClass.contains(Reg)) {
7031 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7032 "Reg must be a valid argument register!");
7033 return LASize + 4 * (Reg - PPC::R3);
7034 }
7035
7036 if (PPC::G8RCRegClass.contains(Reg)) {
7037 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7038 "Reg must be a valid argument register!");
7039 return LASize + 8 * (Reg - PPC::X3);
7040 }
7041
7042 llvm_unreachable("Only general purpose registers expected.");
7043}
7044
7045// AIX ABI Stack Frame Layout:
7046//
7047// Low Memory +--------------------------------------------+
7048// SP +---> | Back chain | ---+
7049// | +--------------------------------------------+ |
7050// | | Saved Condition Register | |
7051// | +--------------------------------------------+ |
7052// | | Saved Linkage Register | |
7053// | +--------------------------------------------+ | Linkage Area
7054// | | Reserved for compilers | |
7055// | +--------------------------------------------+ |
7056// | | Reserved for binders | |
7057// | +--------------------------------------------+ |
7058// | | Saved TOC pointer | ---+
7059// | +--------------------------------------------+
7060// | | Parameter save area |
7061// | +--------------------------------------------+
7062// | | Alloca space |
7063// | +--------------------------------------------+
7064// | | Local variable space |
7065// | +--------------------------------------------+
7066// | | Float/int conversion temporary |
7067// | +--------------------------------------------+
7068// | | Save area for AltiVec registers |
7069// | +--------------------------------------------+
7070// | | AltiVec alignment padding |
7071// | +--------------------------------------------+
7072// | | Save area for VRSAVE register |
7073// | +--------------------------------------------+
7074// | | Save area for General Purpose registers |
7075// | +--------------------------------------------+
7076// | | Save area for Floating Point registers |
7077// | +--------------------------------------------+
7078// +---- | Back chain |
7079// High Memory +--------------------------------------------+
7080//
7081// Specifications:
7082// AIX 7.2 Assembler Language Reference
7083// Subroutine linkage convention
7084
7085SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7086 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7087 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7088 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7089
7090 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7091 CallConv == CallingConv::Fast) &&
7092 "Unexpected calling convention!");
7093
7094 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7095 report_fatal_error("Tail call support is unimplemented on AIX.");
7096
7097 if (useSoftFloat())
7098 report_fatal_error("Soft float support is unimplemented on AIX.");
7099
7100 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7101
7102 const bool IsPPC64 = Subtarget.isPPC64();
7103 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7104
7105 // Assign locations to all of the incoming arguments.
7107 MachineFunction &MF = DAG.getMachineFunction();
7108 MachineFrameInfo &MFI = MF.getFrameInfo();
7109 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7110 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7111
7112 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7113 // Reserve space for the linkage area on the stack.
7114 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7115 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7116 uint64_t SaveStackPos = CCInfo.getStackSize();
7117 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7118 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7119
7121
7122 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7123 CCValAssign &VA = ArgLocs[I++];
7124 MVT LocVT = VA.getLocVT();
7125 MVT ValVT = VA.getValVT();
7126 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7127
7128 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7129 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7130 // For compatibility with the AIX XL compiler, the float args in the
7131 // parameter save area are initialized even if the argument is available
7132 // in register. The caller is required to initialize both the register
7133 // and memory, however, the callee can choose to expect it in either.
7134 // The memloc is dismissed here because the argument is retrieved from
7135 // the register.
7136 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7137 continue;
7138
7139 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7140 const TargetRegisterClass *RegClass = getRegClassForSVT(
7141 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7142 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7143 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7144 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7145 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7146 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7147 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7148 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7149 MachinePointerInfo(), Align(PtrByteSize));
7150 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7151 MemOps.push_back(StoreReg);
7152 }
7153
7154 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7155 unsigned StoreSize =
7156 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7157 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7158 }
7159
7160 auto HandleMemLoc = [&]() {
7161 const unsigned LocSize = LocVT.getStoreSize();
7162 const unsigned ValSize = ValVT.getStoreSize();
7163 assert((ValSize <= LocSize) &&
7164 "Object size is larger than size of MemLoc");
7165 int CurArgOffset = VA.getLocMemOffset();
7166 // Objects are right-justified because AIX is big-endian.
7167 if (LocSize > ValSize)
7168 CurArgOffset += LocSize - ValSize;
7169 // Potential tail calls could cause overwriting of argument stack slots.
7170 const bool IsImmutable =
7172 (CallConv == CallingConv::Fast));
7173 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7174 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7175 SDValue ArgValue =
7176 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7177
7178 // While the ABI specifies the argument type is (sign or zero) extended
7179 // out to register width, not all code is compliant. We truncate and
7180 // re-extend to be more forgiving of these callers when the argument type
7181 // is smaller than register width.
7182 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7183 ValVT.isInteger() &&
7184 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7185 // It is possible to have either real integer values
7186 // or integers that were not originally integers.
7187 // In the latter case, these could have came from structs,
7188 // and these integers would not have an extend on the parameter.
7189 // Since these types of integers do not have an extend specified
7190 // in the first place, the type of extend that we do should not matter.
7191 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7192 ? MVT::i8
7193 : ArgVT;
7194 SDValue ArgValueTrunc =
7195 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7196 SDValue ArgValueExt =
7197 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7198 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7199 InVals.push_back(ArgValueExt);
7200 } else {
7201 InVals.push_back(ArgValue);
7202 }
7203 };
7204
7205 // Vector arguments to VaArg functions are passed both on the stack, and
7206 // in any available GPRs. Load the value from the stack and add the GPRs
7207 // as live ins.
7208 if (VA.isMemLoc() && VA.needsCustom()) {
7209 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7210 assert(isVarArg && "Only use custom memloc for vararg.");
7211 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7212 // matching custom RegLocs.
7213 const unsigned OriginalValNo = VA.getValNo();
7214 (void)OriginalValNo;
7215
7216 auto HandleCustomVecRegLoc = [&]() {
7217 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7218 "Missing custom RegLoc.");
7219 VA = ArgLocs[I++];
7220 assert(VA.getValVT().isVector() &&
7221 "Unexpected Val type for custom RegLoc.");
7222 assert(VA.getValNo() == OriginalValNo &&
7223 "ValNo mismatch between custom MemLoc and RegLoc.");
7225 MF.addLiveIn(VA.getLocReg(),
7226 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7227 Subtarget.hasVSX()));
7228 };
7229
7230 HandleMemLoc();
7231 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7232 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7233 // R10.
7234 HandleCustomVecRegLoc();
7235 HandleCustomVecRegLoc();
7236
7237 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7238 // we passed the vector in R5, R6, R7 and R8.
7239 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7240 assert(!IsPPC64 &&
7241 "Only 2 custom RegLocs expected for 64-bit codegen.");
7242 HandleCustomVecRegLoc();
7243 HandleCustomVecRegLoc();
7244 }
7245
7246 continue;
7247 }
7248
7249 if (VA.isRegLoc()) {
7250 if (VA.getValVT().isScalarInteger())
7252 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7253 switch (VA.getValVT().SimpleTy) {
7254 default:
7255 report_fatal_error("Unhandled value type for argument.");
7256 case MVT::f32:
7258 break;
7259 case MVT::f64:
7261 break;
7262 }
7263 } else if (VA.getValVT().isVector()) {
7264 switch (VA.getValVT().SimpleTy) {
7265 default:
7266 report_fatal_error("Unhandled value type for argument.");
7267 case MVT::v16i8:
7269 break;
7270 case MVT::v8i16:
7272 break;
7273 case MVT::v4i32:
7274 case MVT::v2i64:
7275 case MVT::v1i128:
7277 break;
7278 case MVT::v4f32:
7279 case MVT::v2f64:
7281 break;
7282 }
7283 }
7284 }
7285
7286 if (Flags.isByVal() && VA.isMemLoc()) {
7287 const unsigned Size =
7288 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7289 PtrByteSize);
7290 const int FI = MF.getFrameInfo().CreateFixedObject(
7291 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7292 /* IsAliased */ true);
7293 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7294 InVals.push_back(FIN);
7295
7296 continue;
7297 }
7298
7299 if (Flags.isByVal()) {
7300 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7301
7302 const MCPhysReg ArgReg = VA.getLocReg();
7303 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7304
7305 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7306 const int FI = MF.getFrameInfo().CreateFixedObject(
7307 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7308 /* IsAliased */ true);
7309 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7310 InVals.push_back(FIN);
7311
7312 // Add live ins for all the RegLocs for the same ByVal.
7313 const TargetRegisterClass *RegClass =
7314 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7315
7316 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7317 unsigned Offset) {
7318 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7319 // Since the callers side has left justified the aggregate in the
7320 // register, we can simply store the entire register into the stack
7321 // slot.
7322 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7323 // The store to the fixedstack object is needed becuase accessing a
7324 // field of the ByVal will use a gep and load. Ideally we will optimize
7325 // to extracting the value from the register directly, and elide the
7326 // stores when the arguments address is not taken, but that will need to
7327 // be future work.
7328 SDValue Store = DAG.getStore(
7329 CopyFrom.getValue(1), dl, CopyFrom,
7332
7333 MemOps.push_back(Store);
7334 };
7335
7336 unsigned Offset = 0;
7337 HandleRegLoc(VA.getLocReg(), Offset);
7338 Offset += PtrByteSize;
7339 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7340 Offset += PtrByteSize) {
7341 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342 "RegLocs should be for ByVal argument.");
7343
7344 const CCValAssign RL = ArgLocs[I++];
7345 HandleRegLoc(RL.getLocReg(), Offset);
7347 }
7348
7349 if (Offset != StackSize) {
7350 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7351 "Expected MemLoc for remaining bytes.");
7352 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7353 // Consume the MemLoc.The InVal has already been emitted, so nothing
7354 // more needs to be done.
7355 ++I;
7356 }
7357
7358 continue;
7359 }
7360
7361 if (VA.isRegLoc() && !VA.needsCustom()) {
7362 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7363 Register VReg =
7364 MF.addLiveIn(VA.getLocReg(),
7365 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7366 Subtarget.hasVSX()));
7367 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7368 if (ValVT.isScalarInteger() &&
7369 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7370 ArgValue =
7371 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7372 }
7373 InVals.push_back(ArgValue);
7374 continue;
7375 }
7376 if (VA.isMemLoc()) {
7377 HandleMemLoc();
7378 continue;
7379 }
7380 }
7381
7382 // On AIX a minimum of 8 words is saved to the parameter save area.
7383 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7384 // Area that is at least reserved in the caller of this function.
7385 unsigned CallerReservedArea = std::max<unsigned>(
7386 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7387
7388 // Set the size that is at least reserved in caller of this function. Tail
7389 // call optimized function's reserved stack space needs to be aligned so
7390 // that taking the difference between two stack areas will result in an
7391 // aligned stack.
7392 CallerReservedArea =
7393 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7394 FuncInfo->setMinReservedArea(CallerReservedArea);
7395
7396 if (isVarArg) {
7397 int VAListIndex = 0;
7398 // If any of the optional arguments are passed in register then the fixed
7399 // stack object we spill into is not immutable. Create a fixed stack object
7400 // that overlaps the remainder of the parameter save area.
7401 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7402 unsigned FixedStackSize =
7403 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7404 VAListIndex =
7405 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7406 /* IsImmutable */ false, /* IsAliased */ true);
7407 } else {
7408 // All the arguments passed through ellipses are on the stack. Create a
7409 // dummy fixed stack object the same size as a pointer since we don't
7410 // know the actual size.
7411 VAListIndex =
7412 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7413 /* IsImmutable */ true, /* IsAliased */ true);
7414 }
7415
7416 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7417 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7418
7419 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7420 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7421
7422 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7423 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7424 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7425
7426 // The fixed integer arguments of a variadic function are stored to the
7427 // VarArgsFrameIndex on the stack so that they may be loaded by
7428 // dereferencing the result of va_next.
7429 for (unsigned
7430 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7431 Offset = 0;
7432 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7433
7434 const Register VReg =
7435 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7436 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7437
7438 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7439 MachinePointerInfo MPI =
7440 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7441 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7442 MemOps.push_back(Store);
7443 // Increment the address for the next argument to store.
7444 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7445 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7446 }
7447 }
7448
7449 if (!MemOps.empty())
7450 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7451
7452 return Chain;
7453}
7454
7455SDValue PPCTargetLowering::LowerCall_AIX(
7456 SDValue Chain, SDValue Callee, CallFlags CFlags,
7458 const SmallVectorImpl<SDValue> &OutVals,
7459 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7461 const CallBase *CB) const {
7462 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7463 // AIX ABI stack frame layout.
7464
7465 assert((CFlags.CallConv == CallingConv::C ||
7466 CFlags.CallConv == CallingConv::Cold ||
7467 CFlags.CallConv == CallingConv::Fast) &&
7468 "Unexpected calling convention!");
7469
7470 if (CFlags.IsPatchPoint)
7471 report_fatal_error("This call type is unimplemented on AIX.");
7472
7473 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7474
7475 MachineFunction &MF = DAG.getMachineFunction();
7477 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7478 *DAG.getContext());
7479
7480 // Reserve space for the linkage save area (LSA) on the stack.
7481 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7482 // [SP][CR][LR][2 x reserved][TOC].
7483 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7484 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7485 const bool IsPPC64 = Subtarget.isPPC64();
7486 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7487 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7488 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7489 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7490
7491 // The prolog code of the callee may store up to 8 GPR argument registers to
7492 // the stack, allowing va_start to index over them in memory if the callee
7493 // is variadic.
7494 // Because we cannot tell if this is needed on the caller side, we have to
7495 // conservatively assume that it is needed. As such, make sure we have at
7496 // least enough stack space for the caller to store the 8 GPRs.
7497 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7498 const unsigned NumBytes = std::max<unsigned>(
7499 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7500
7501 // Adjust the stack pointer for the new arguments...
7502 // These operations are automatically eliminated by the prolog/epilog pass.
7503 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7504 SDValue CallSeqStart = Chain;
7505
7507 SmallVector<SDValue, 8> MemOpChains;
7508
7509 // Set up a copy of the stack pointer for loading and storing any
7510 // arguments that may not fit in the registers available for argument
7511 // passing.
7512 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7513 : DAG.getRegister(PPC::R1, MVT::i32);
7514
7515 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7516 const unsigned ValNo = ArgLocs[I].getValNo();
7517 SDValue Arg = OutVals[ValNo];
7518 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7519
7520 if (Flags.isByVal()) {
7521 const unsigned ByValSize = Flags.getByValSize();
7522
7523 // Nothing to do for zero-sized ByVals on the caller side.
7524 if (!ByValSize) {
7525 ++I;
7526 continue;
7527 }
7528
7529 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7530 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7531 (LoadOffset != 0)
7532 ? DAG.getObjectPtrOffset(
7533 dl, Arg, TypeSize::getFixed(LoadOffset))
7534 : Arg,
7535 MachinePointerInfo(), VT);
7536 };
7537
7538 unsigned LoadOffset = 0;
7539
7540 // Initialize registers, which are fully occupied by the by-val argument.
7541 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7542 SDValue Load = GetLoad(PtrVT, LoadOffset);
7543 MemOpChains.push_back(Load.getValue(1));
7544 LoadOffset += PtrByteSize;
7545 const CCValAssign &ByValVA = ArgLocs[I++];
7546 assert(ByValVA.getValNo() == ValNo &&
7547 "Unexpected location for pass-by-value argument.");
7548 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7549 }
7550
7551 if (LoadOffset == ByValSize)
7552 continue;
7553
7554 // There must be one more loc to handle the remainder.
7555 assert(ArgLocs[I].getValNo() == ValNo &&
7556 "Expected additional location for by-value argument.");
7557
7558 if (ArgLocs[I].isMemLoc()) {
7559 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7560 const CCValAssign &ByValVA = ArgLocs[I++];
7561 ISD::ArgFlagsTy MemcpyFlags = Flags;
7562 // Only memcpy the bytes that don't pass in register.
7563 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7564 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7565 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7566 dl, Arg, TypeSize::getFixed(LoadOffset))
7567 : Arg,
7569 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7570 CallSeqStart, MemcpyFlags, DAG, dl);
7571 continue;
7572 }
7573
7574 // Initialize the final register residue.
7575 // Any residue that occupies the final by-val arg register must be
7576 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7577 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7578 // 2 and 1 byte loads.
7579 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7580 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7581 "Unexpected register residue for by-value argument.");
7582 SDValue ResidueVal;
7583 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7584 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7585 const MVT VT =
7586 N == 1 ? MVT::i8
7587 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7588 SDValue Load = GetLoad(VT, LoadOffset);
7589 MemOpChains.push_back(Load.getValue(1));
7590 LoadOffset += N;
7591 Bytes += N;
7592
7593 // By-val arguments are passed left-justfied in register.
7594 // Every load here needs to be shifted, otherwise a full register load
7595 // should have been used.
7596 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7597 "Unexpected load emitted during handling of pass-by-value "
7598 "argument.");
7599 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7600 EVT ShiftAmountTy =
7601 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7602 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7603 SDValue ShiftedLoad =
7604 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7605 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7606 ShiftedLoad)
7607 : ShiftedLoad;
7608 }
7609
7610 const CCValAssign &ByValVA = ArgLocs[I++];
7611 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7612 continue;
7613 }
7614
7615 CCValAssign &VA = ArgLocs[I++];
7616 const MVT LocVT = VA.getLocVT();
7617 const MVT ValVT = VA.getValVT();
7618
7619 switch (VA.getLocInfo()) {
7620 default:
7621 report_fatal_error("Unexpected argument extension type.");
7622 case CCValAssign::Full:
7623 break;
7624 case CCValAssign::ZExt:
7625 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7626 break;
7627 case CCValAssign::SExt:
7628 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7629 break;
7630 }
7631
7632 if (VA.isRegLoc() && !VA.needsCustom()) {
7633 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7634 continue;
7635 }
7636
7637 // Vector arguments passed to VarArg functions need custom handling when
7638 // they are passed (at least partially) in GPRs.
7639 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7640 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7641 // Store value to its stack slot.
7642 SDValue PtrOff =
7643 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7644 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7645 SDValue Store =
7646 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7647 MemOpChains.push_back(Store);
7648 const unsigned OriginalValNo = VA.getValNo();
7649 // Then load the GPRs from the stack
7650 unsigned LoadOffset = 0;
7651 auto HandleCustomVecRegLoc = [&]() {
7652 assert(I != E && "Unexpected end of CCvalAssigns.");
7653 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7654 "Expected custom RegLoc.");
7655 CCValAssign RegVA = ArgLocs[I++];
7656 assert(RegVA.getValNo() == OriginalValNo &&
7657 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7658 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7659 DAG.getConstant(LoadOffset, dl, PtrVT));
7660 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7661 MemOpChains.push_back(Load.getValue(1));
7662 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7663 LoadOffset += PtrByteSize;
7664 };
7665
7666 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7667 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7668 // R10.
7669 HandleCustomVecRegLoc();
7670 HandleCustomVecRegLoc();
7671
7672 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7673 ArgLocs[I].getValNo() == OriginalValNo) {
7674 assert(!IsPPC64 &&
7675 "Only 2 custom RegLocs expected for 64-bit codegen.");
7676 HandleCustomVecRegLoc();
7677 HandleCustomVecRegLoc();
7678 }
7679
7680 continue;
7681 }
7682
7683 if (VA.isMemLoc()) {
7684 SDValue PtrOff =
7685 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7686 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7687 MemOpChains.push_back(
7688 DAG.getStore(Chain, dl, Arg, PtrOff,
7690 Subtarget.getFrameLowering()->getStackAlign()));
7691
7692 continue;
7693 }
7694
7695 if (!ValVT.isFloatingPoint())
7697 "Unexpected register handling for calling convention.");
7698
7699 // Custom handling is used for GPR initializations for vararg float
7700 // arguments.
7701 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7702 LocVT.isInteger() &&
7703 "Custom register handling only expected for VarArg.");
7704
7705 SDValue ArgAsInt =
7706 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7707
7708 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7709 // f32 in 32-bit GPR
7710 // f64 in 64-bit GPR
7711 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7712 else if (Arg.getValueType().getFixedSizeInBits() <
7713 LocVT.getFixedSizeInBits())
7714 // f32 in 64-bit GPR.
7715 RegsToPass.push_back(std::make_pair(
7716 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7717 else {
7718 // f64 in two 32-bit GPRs
7719 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7720 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7721 "Unexpected custom register for argument!");
7722 CCValAssign &GPR1 = VA;
7723 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7724 DAG.getConstant(32, dl, MVT::i8));
7725 RegsToPass.push_back(std::make_pair(
7726 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7727
7728 if (I != E) {
7729 // If only 1 GPR was available, there will only be one custom GPR and
7730 // the argument will also pass in memory.
7731 CCValAssign &PeekArg = ArgLocs[I];
7732 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7733 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7734 CCValAssign &GPR2 = ArgLocs[I++];
7735 RegsToPass.push_back(std::make_pair(
7736 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7737 }
7738 }
7739 }
7740 }
7741
7742 if (!MemOpChains.empty())
7743 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7744
7745 // For indirect calls, we need to save the TOC base to the stack for
7746 // restoration after the call.
7747 if (CFlags.IsIndirect) {
7748 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7749 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7750 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7751 const MVT PtrVT = Subtarget.getScalarIntVT();
7752 const unsigned TOCSaveOffset =
7753 Subtarget.getFrameLowering()->getTOCSaveOffset();
7754
7755 setUsesTOCBasePtr(DAG);
7756 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7757 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7758 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7759 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7760 Chain = DAG.getStore(
7761 Val.getValue(1), dl, Val, AddPtr,
7762 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7763 }
7764
7765 // Build a sequence of copy-to-reg nodes chained together with token chain
7766 // and flag operands which copy the outgoing args into the appropriate regs.
7767 SDValue InGlue;
7768 for (auto Reg : RegsToPass) {
7769 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7770 InGlue = Chain.getValue(1);
7771 }
7772
7773 const int SPDiff = 0;
7774 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7775 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7776}
7777
7778bool
7779PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7780 MachineFunction &MF, bool isVarArg,
7783 const Type *RetTy) const {
7785 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7786 return CCInfo.CheckReturn(
7787 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7789 : RetCC_PPC);
7790}
7791
7792SDValue
7793PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7794 bool isVarArg,
7796 const SmallVectorImpl<SDValue> &OutVals,
7797 const SDLoc &dl, SelectionDAG &DAG) const {
7799 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7800 *DAG.getContext());
7801 CCInfo.AnalyzeReturn(Outs,
7802 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7804 : RetCC_PPC);
7805
7806 SDValue Glue;
7807 SmallVector<SDValue, 4> RetOps(1, Chain);
7808
7809 // Copy the result values into the output registers.
7810 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7811 CCValAssign &VA = RVLocs[i];
7812 assert(VA.isRegLoc() && "Can only return in registers!");
7813
7814 SDValue Arg = OutVals[RealResIdx];
7815
7816 switch (VA.getLocInfo()) {
7817 default: llvm_unreachable("Unknown loc info!");
7818 case CCValAssign::Full: break;
7819 case CCValAssign::AExt:
7820 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7821 break;
7822 case CCValAssign::ZExt:
7823 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7824 break;
7825 case CCValAssign::SExt:
7826 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7827 break;
7828 }
7829 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7830 bool isLittleEndian = Subtarget.isLittleEndian();
7831 // Legalize ret f64 -> ret 2 x i32.
7832 SDValue SVal =
7833 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7834 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7835 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7836 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7837 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7838 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7839 Glue = Chain.getValue(1);
7840 VA = RVLocs[++i]; // skip ahead to next loc
7841 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7842 } else
7843 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7844 Glue = Chain.getValue(1);
7845 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7846 }
7847
7848 RetOps[0] = Chain; // Update chain.
7849
7850 // Add the glue if we have it.
7851 if (Glue.getNode())
7852 RetOps.push_back(Glue);
7853
7854 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7855}
7856
7857SDValue
7858PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7859 SelectionDAG &DAG) const {
7860 SDLoc dl(Op);
7861
7862 // Get the correct type for integers.
7863 EVT IntVT = Op.getValueType();
7864
7865 // Get the inputs.
7866 SDValue Chain = Op.getOperand(0);
7867 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7868 // Build a DYNAREAOFFSET node.
7869 SDValue Ops[2] = {Chain, FPSIdx};
7870 SDVTList VTs = DAG.getVTList(IntVT);
7871 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7872}
7873
7874SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7875 SelectionDAG &DAG) const {
7876 // When we pop the dynamic allocation we need to restore the SP link.
7877 SDLoc dl(Op);
7878
7879 // Get the correct type for pointers.
7880 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7881
7882 // Construct the stack pointer operand.
7883 bool isPPC64 = Subtarget.isPPC64();
7884 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7885 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7886
7887 // Get the operands for the STACKRESTORE.
7888 SDValue Chain = Op.getOperand(0);
7889 SDValue SaveSP = Op.getOperand(1);
7890
7891 // Load the old link SP.
7892 SDValue LoadLinkSP =
7893 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7894
7895 // Restore the stack pointer.
7896 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7897
7898 // Store the old link SP.
7899 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7900}
7901
7902SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7903 MachineFunction &MF = DAG.getMachineFunction();
7904 bool isPPC64 = Subtarget.isPPC64();
7905 EVT PtrVT = getPointerTy(MF.getDataLayout());
7906
7907 // Get current frame pointer save index. The users of this index will be
7908 // primarily DYNALLOC instructions.
7909 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7910 int RASI = FI->getReturnAddrSaveIndex();
7911
7912 // If the frame pointer save index hasn't been defined yet.
7913 if (!RASI) {
7914 // Find out what the fix offset of the frame pointer save area.
7915 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7916 // Allocate the frame index for frame pointer save area.
7917 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7918 // Save the result.
7919 FI->setReturnAddrSaveIndex(RASI);
7920 }
7921 return DAG.getFrameIndex(RASI, PtrVT);
7922}
7923
7924SDValue
7925PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7926 MachineFunction &MF = DAG.getMachineFunction();
7927 bool isPPC64 = Subtarget.isPPC64();
7928 EVT PtrVT = getPointerTy(MF.getDataLayout());
7929
7930 // Get current frame pointer save index. The users of this index will be
7931 // primarily DYNALLOC instructions.
7932 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7933 int FPSI = FI->getFramePointerSaveIndex();
7934
7935 // If the frame pointer save index hasn't been defined yet.
7936 if (!FPSI) {
7937 // Find out what the fix offset of the frame pointer save area.
7938 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7939 // Allocate the frame index for frame pointer save area.
7940 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7941 // Save the result.
7942 FI->setFramePointerSaveIndex(FPSI);
7943 }
7944 return DAG.getFrameIndex(FPSI, PtrVT);
7945}
7946
7947SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 MachineFunction &MF = DAG.getMachineFunction();
7950 // Get the inputs.
7951 SDValue Chain = Op.getOperand(0);
7952 SDValue Size = Op.getOperand(1);
7953 SDLoc dl(Op);
7954
7955 // Get the correct type for pointers.
7956 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7957 // Negate the size.
7958 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7959 DAG.getConstant(0, dl, PtrVT), Size);
7960 // Construct a node for the frame pointer save index.
7961 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7962 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7963 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7964 if (hasInlineStackProbe(MF))
7965 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7966 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7967}
7968
7969SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7970 SelectionDAG &DAG) const {
7971 MachineFunction &MF = DAG.getMachineFunction();
7972
7973 bool isPPC64 = Subtarget.isPPC64();
7974 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7975
7976 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7977 return DAG.getFrameIndex(FI, PtrVT);
7978}
7979
7980SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7981 SelectionDAG &DAG) const {
7982 SDLoc DL(Op);
7983 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7984 DAG.getVTList(MVT::i32, MVT::Other),
7985 Op.getOperand(0), Op.getOperand(1));
7986}
7987
7988SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7989 SelectionDAG &DAG) const {
7990 SDLoc DL(Op);
7991 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7992 Op.getOperand(0), Op.getOperand(1));
7993}
7994
7995SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7996 if (Op.getValueType().isVector())
7997 return LowerVectorLoad(Op, DAG);
7998
7999 assert(Op.getValueType() == MVT::i1 &&
8000 "Custom lowering only for i1 loads");
8001
8002 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8003
8004 SDLoc dl(Op);
8005 LoadSDNode *LD = cast<LoadSDNode>(Op);
8006
8007 SDValue Chain = LD->getChain();
8008 SDValue BasePtr = LD->getBasePtr();
8009 MachineMemOperand *MMO = LD->getMemOperand();
8010
8011 SDValue NewLD =
8012 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8013 BasePtr, MVT::i8, MMO);
8014 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8015
8016 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8017 return DAG.getMergeValues(Ops, dl);
8018}
8019
8020SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8021 if (Op.getOperand(1).getValueType().isVector())
8022 return LowerVectorStore(Op, DAG);
8023
8024 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8025 "Custom lowering only for i1 stores");
8026
8027 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8028
8029 SDLoc dl(Op);
8030 StoreSDNode *ST = cast<StoreSDNode>(Op);
8031
8032 SDValue Chain = ST->getChain();
8033 SDValue BasePtr = ST->getBasePtr();
8034 SDValue Value = ST->getValue();
8035 MachineMemOperand *MMO = ST->getMemOperand();
8036
8038 Value);
8039 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8040}
8041
8042// FIXME: Remove this once the ANDI glue bug is fixed:
8043SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8044 assert(Op.getValueType() == MVT::i1 &&
8045 "Custom lowering only for i1 results");
8046
8047 SDLoc DL(Op);
8048 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8049}
8050
8051SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8052 SelectionDAG &DAG) const {
8053
8054 // Implements a vector truncate that fits in a vector register as a shuffle.
8055 // We want to legalize vector truncates down to where the source fits in
8056 // a vector register (and target is therefore smaller than vector register
8057 // size). At that point legalization will try to custom lower the sub-legal
8058 // result and get here - where we can contain the truncate as a single target
8059 // operation.
8060
8061 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8062 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8063 //
8064 // We will implement it for big-endian ordering as this (where x denotes
8065 // undefined):
8066 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8067 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8068 //
8069 // The same operation in little-endian ordering will be:
8070 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8071 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8072
8073 EVT TrgVT = Op.getValueType();
8074 assert(TrgVT.isVector() && "Vector type expected.");
8075 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8076 EVT EltVT = TrgVT.getVectorElementType();
8077 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8078 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8080 return SDValue();
8081
8082 SDValue N1 = Op.getOperand(0);
8083 EVT SrcVT = N1.getValueType();
8084 unsigned SrcSize = SrcVT.getSizeInBits();
8085 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8088 return SDValue();
8089 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8090 return SDValue();
8091
8092 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8093 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8094
8095 SDLoc DL(Op);
8096 SDValue Op1, Op2;
8097 if (SrcSize == 256) {
8098 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8099 EVT SplitVT =
8101 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8102 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8103 DAG.getConstant(0, DL, VecIdxTy));
8104 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8105 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8106 }
8107 else {
8108 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8109 Op2 = DAG.getUNDEF(WideVT);
8110 }
8111
8112 // First list the elements we want to keep.
8113 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8114 SmallVector<int, 16> ShuffV;
8115 if (Subtarget.isLittleEndian())
8116 for (unsigned i = 0; i < TrgNumElts; ++i)
8117 ShuffV.push_back(i * SizeMult);
8118 else
8119 for (unsigned i = 1; i <= TrgNumElts; ++i)
8120 ShuffV.push_back(i * SizeMult - 1);
8121
8122 // Populate the remaining elements with undefs.
8123 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8124 // ShuffV.push_back(i + WideNumElts);
8125 ShuffV.push_back(WideNumElts + 1);
8126
8127 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8128 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8129 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8130}
8131
8132/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8133/// possible.
8134SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8135 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8136 EVT ResVT = Op.getValueType();
8137 EVT CmpVT = Op.getOperand(0).getValueType();
8138 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8139 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8140 SDLoc dl(Op);
8141
8142 // Without power9-vector, we don't have native instruction for f128 comparison.
8143 // Following transformation to libcall is needed for setcc:
8144 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8145 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8146 SDValue Z = DAG.getSetCC(
8147 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8148 LHS, RHS, CC);
8149 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8150 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8151 }
8152
8153 // Not FP, or using SPE? Not a fsel.
8154 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8155 Subtarget.hasSPE())
8156 return Op;
8157
8158 SDNodeFlags Flags = Op.getNode()->getFlags();
8159
8160 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8161 // presence of infinities.
8162 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8163 switch (CC) {
8164 default:
8165 break;
8166 case ISD::SETOGT:
8167 case ISD::SETGT:
8168 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8169 case ISD::SETOLT:
8170 case ISD::SETLT:
8171 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8172 }
8173 }
8174
8175 // We might be able to do better than this under some circumstances, but in
8176 // general, fsel-based lowering of select is a finite-math-only optimization.
8177 // For more information, see section F.3 of the 2.06 ISA specification.
8178 // With ISA 3.0
8179 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8180 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8181 ResVT == MVT::f128)
8182 return Op;
8183
8184 // If the RHS of the comparison is a 0.0, we don't need to do the
8185 // subtraction at all.
8186 SDValue Sel1;
8188 switch (CC) {
8189 default: break; // SETUO etc aren't handled by fsel.
8190 case ISD::SETNE:
8191 std::swap(TV, FV);
8192 [[fallthrough]];
8193 case ISD::SETEQ:
8194 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8195 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8196 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8197 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8198 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8199 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8200 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8201 case ISD::SETULT:
8202 case ISD::SETLT:
8203 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8204 [[fallthrough]];
8205 case ISD::SETOGE:
8206 case ISD::SETGE:
8207 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8208 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8209 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8210 case ISD::SETUGT:
8211 case ISD::SETGT:
8212 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8213 [[fallthrough]];
8214 case ISD::SETOLE:
8215 case ISD::SETLE:
8216 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8217 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8218 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8219 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8220 }
8221
8222 SDValue Cmp;
8223 switch (CC) {
8224 default: break; // SETUO etc aren't handled by fsel.
8225 case ISD::SETNE:
8226 std::swap(TV, FV);
8227 [[fallthrough]];
8228 case ISD::SETEQ:
8229 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8230 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8231 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8232 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8233 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8234 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8235 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8236 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8237 case ISD::SETULT:
8238 case ISD::SETLT:
8239 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8240 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8241 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8242 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8243 case ISD::SETOGE:
8244 case ISD::SETGE:
8245 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8246 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8247 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8248 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8249 case ISD::SETUGT:
8250 case ISD::SETGT:
8251 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8252 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8253 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8254 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8255 case ISD::SETOLE:
8256 case ISD::SETLE:
8257 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8258 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8259 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8260 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8261 }
8262 return Op;
8263}
8264
8265static unsigned getPPCStrictOpcode(unsigned Opc) {
8266 switch (Opc) {
8267 default:
8268 llvm_unreachable("No strict version of this opcode!");
8269 case PPCISD::FCTIDZ:
8270 return PPCISD::STRICT_FCTIDZ;
8271 case PPCISD::FCTIWZ:
8272 return PPCISD::STRICT_FCTIWZ;
8273 case PPCISD::FCTIDUZ:
8274 return PPCISD::STRICT_FCTIDUZ;
8275 case PPCISD::FCTIWUZ:
8276 return PPCISD::STRICT_FCTIWUZ;
8277 case PPCISD::FCFID:
8278 return PPCISD::STRICT_FCFID;
8279 case PPCISD::FCFIDU:
8280 return PPCISD::STRICT_FCFIDU;
8281 case PPCISD::FCFIDS:
8282 return PPCISD::STRICT_FCFIDS;
8283 case PPCISD::FCFIDUS:
8284 return PPCISD::STRICT_FCFIDUS;
8285 }
8286}
8287
8289 const PPCSubtarget &Subtarget) {
8290 SDLoc dl(Op);
8291 bool IsStrict = Op->isStrictFPOpcode();
8292 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8293 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8294
8295 // TODO: Any other flags to propagate?
8296 SDNodeFlags Flags;
8297 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8298
8299 // For strict nodes, source is the second operand.
8300 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8301 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8302 MVT DestTy = Op.getSimpleValueType();
8303 assert(Src.getValueType().isFloatingPoint() &&
8304 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8305 DestTy == MVT::i64) &&
8306 "Invalid FP_TO_INT types");
8307 if (Src.getValueType() == MVT::f32) {
8308 if (IsStrict) {
8309 Src =
8311 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8312 Chain = Src.getValue(1);
8313 } else
8314 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8315 }
8316 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8317 DestTy = Subtarget.getScalarIntVT();
8318 unsigned Opc = ISD::DELETED_NODE;
8319 switch (DestTy.SimpleTy) {
8320 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8321 case MVT::i32:
8322 Opc = IsSigned ? PPCISD::FCTIWZ
8323 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8324 break;
8325 case MVT::i64:
8326 assert((IsSigned || Subtarget.hasFPCVT()) &&
8327 "i64 FP_TO_UINT is supported only with FPCVT");
8328 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8329 }
8330 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8331 SDValue Conv;
8332 if (IsStrict) {
8334 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8335 Flags);
8336 } else {
8337 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8338 }
8339 return Conv;
8340}
8341
8342void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8343 SelectionDAG &DAG,
8344 const SDLoc &dl) const {
8345 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8346 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8347 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8348 bool IsStrict = Op->isStrictFPOpcode();
8349
8350 // Convert the FP value to an int value through memory.
8351 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8352 (IsSigned || Subtarget.hasFPCVT());
8353 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8354 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8355 MachinePointerInfo MPI =
8357
8358 // Emit a store to the stack slot.
8359 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8360 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8361 if (i32Stack) {
8362 MachineFunction &MF = DAG.getMachineFunction();
8363 Alignment = Align(4);
8364 MachineMemOperand *MMO =
8365 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8366 SDValue Ops[] = { Chain, Tmp, FIPtr };
8367 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8368 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8369 } else
8370 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8371
8372 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8373 // add in a bias on big endian.
8374 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8375 !Subtarget.isLittleEndian()) {
8376 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8377 DAG.getConstant(4, dl, FIPtr.getValueType()));
8378 MPI = MPI.getWithOffset(4);
8379 }
8380
8381 RLI.Chain = Chain;
8382 RLI.Ptr = FIPtr;
8383 RLI.MPI = MPI;
8384 RLI.Alignment = Alignment;
8385}
8386
8387/// Custom lowers floating point to integer conversions to use
8388/// the direct move instructions available in ISA 2.07 to avoid the
8389/// need for load/store combinations.
8390SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8391 SelectionDAG &DAG,
8392 const SDLoc &dl) const {
8393 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8394 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8395 if (Op->isStrictFPOpcode())
8396 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8397 else
8398 return Mov;
8399}
8400
8401SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8402 const SDLoc &dl) const {
8403 bool IsStrict = Op->isStrictFPOpcode();
8404 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8405 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8406 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8407 EVT SrcVT = Src.getValueType();
8408 EVT DstVT = Op.getValueType();
8409
8410 // FP to INT conversions are legal for f128.
8411 if (SrcVT == MVT::f128)
8412 return Subtarget.hasP9Vector() ? Op : SDValue();
8413
8414 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8415 // PPC (the libcall is not available).
8416 if (SrcVT == MVT::ppcf128) {
8417 if (DstVT == MVT::i32) {
8418 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8419 // set other fast-math flags to FP operations in both strict and
8420 // non-strict cases. (FP_TO_SINT, FSUB)
8421 SDNodeFlags Flags;
8422 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8423
8424 if (IsSigned) {
8425 SDValue Lo, Hi;
8426 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8427
8428 // Add the two halves of the long double in round-to-zero mode, and use
8429 // a smaller FP_TO_SINT.
8430 if (IsStrict) {
8431 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8432 DAG.getVTList(MVT::f64, MVT::Other),
8433 {Op.getOperand(0), Lo, Hi}, Flags);
8434 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8435 DAG.getVTList(MVT::i32, MVT::Other),
8436 {Res.getValue(1), Res}, Flags);
8437 } else {
8438 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8439 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8440 }
8441 } else {
8442 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8443 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8444 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8445 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8446 if (IsStrict) {
8447 // Sel = Src < 0x80000000
8448 // FltOfs = select Sel, 0.0, 0x80000000
8449 // IntOfs = select Sel, 0, 0x80000000
8450 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8451 SDValue Chain = Op.getOperand(0);
8452 EVT SetCCVT =
8453 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8454 EVT DstSetCCVT =
8455 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8456 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8457 Chain, true);
8458 Chain = Sel.getValue(1);
8459
8460 SDValue FltOfs = DAG.getSelect(
8461 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8462 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8463
8464 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8465 DAG.getVTList(SrcVT, MVT::Other),
8466 {Chain, Src, FltOfs}, Flags);
8467 Chain = Val.getValue(1);
8468 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8469 DAG.getVTList(DstVT, MVT::Other),
8470 {Chain, Val}, Flags);
8471 Chain = SInt.getValue(1);
8472 SDValue IntOfs = DAG.getSelect(
8473 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8474 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8475 return DAG.getMergeValues({Result, Chain}, dl);
8476 } else {
8477 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8478 // FIXME: generated code sucks.
8479 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8480 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8481 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8482 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8483 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8484 }
8485 }
8486 }
8487
8488 return SDValue();
8489 }
8490
8491 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8492 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8493
8494 ReuseLoadInfo RLI;
8495 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8496
8497 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8498 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8499}
8500
8501// We're trying to insert a regular store, S, and then a load, L. If the
8502// incoming value, O, is a load, we might just be able to have our load use the
8503// address used by O. However, we don't know if anything else will store to
8504// that address before we can load from it. To prevent this situation, we need
8505// to insert our load, L, into the chain as a peer of O. To do this, we give L
8506// the same chain operand as O, we create a token factor from the chain results
8507// of O and L, and we replace all uses of O's chain result with that token
8508// factor (this last part is handled by makeEquivalentMemoryOrdering).
8509bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8510 ReuseLoadInfo &RLI,
8511 SelectionDAG &DAG,
8512 ISD::LoadExtType ET) const {
8513 // Conservatively skip reusing for constrained FP nodes.
8514 if (Op->isStrictFPOpcode())
8515 return false;
8516
8517 SDLoc dl(Op);
8518 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8519 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8520 if (ET == ISD::NON_EXTLOAD &&
8521 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8522 isOperationLegalOrCustom(Op.getOpcode(),
8523 Op.getOperand(0).getValueType())) {
8524
8525 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8526 return true;
8527 }
8528
8529 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8530 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8531 LD->isNonTemporal())
8532 return false;
8533 if (LD->getMemoryVT() != MemVT)
8534 return false;
8535
8536 // If the result of the load is an illegal type, then we can't build a
8537 // valid chain for reuse since the legalised loads and token factor node that
8538 // ties the legalised loads together uses a different output chain then the
8539 // illegal load.
8540 if (!isTypeLegal(LD->getValueType(0)))
8541 return false;
8542
8543 RLI.Ptr = LD->getBasePtr();
8544 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8545 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8546 "Non-pre-inc AM on PPC?");
8547 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8548 LD->getOffset());
8549 }
8550
8551 RLI.Chain = LD->getChain();
8552 RLI.MPI = LD->getPointerInfo();
8553 RLI.IsDereferenceable = LD->isDereferenceable();
8554 RLI.IsInvariant = LD->isInvariant();
8555 RLI.Alignment = LD->getAlign();
8556 RLI.AAInfo = LD->getAAInfo();
8557 RLI.Ranges = LD->getRanges();
8558
8559 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8560 return true;
8561}
8562
8563/// Analyze profitability of direct move
8564/// prefer float load to int load plus direct move
8565/// when there is no integer use of int load
8566bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8567 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8568 if (Origin->getOpcode() != ISD::LOAD)
8569 return true;
8570
8571 // If there is no LXSIBZX/LXSIHZX, like Power8,
8572 // prefer direct move if the memory size is 1 or 2 bytes.
8573 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8574 if (!Subtarget.hasP9Vector() &&
8575 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8576 return true;
8577
8578 for (SDUse &Use : Origin->uses()) {
8579
8580 // Only look at the users of the loaded value.
8581 if (Use.getResNo() != 0)
8582 continue;
8583
8584 SDNode *User = Use.getUser();
8585 if (User->getOpcode() != ISD::SINT_TO_FP &&
8586 User->getOpcode() != ISD::UINT_TO_FP &&
8587 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8588 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8589 return true;
8590 }
8591
8592 return false;
8593}
8594
8596 const PPCSubtarget &Subtarget,
8597 SDValue Chain = SDValue()) {
8598 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8599 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8600 SDLoc dl(Op);
8601
8602 // TODO: Any other flags to propagate?
8603 SDNodeFlags Flags;
8604 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8605
8606 // If we have FCFIDS, then use it when converting to single-precision.
8607 // Otherwise, convert to double-precision and then round.
8608 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8609 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8610 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8611 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8612 if (Op->isStrictFPOpcode()) {
8613 if (!Chain)
8614 Chain = Op.getOperand(0);
8615 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8616 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8617 } else
8618 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8619}
8620
8621/// Custom lowers integer to floating point conversions to use
8622/// the direct move instructions available in ISA 2.07 to avoid the
8623/// need for load/store combinations.
8624SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8625 SelectionDAG &DAG,
8626 const SDLoc &dl) const {
8627 assert((Op.getValueType() == MVT::f32 ||
8628 Op.getValueType() == MVT::f64) &&
8629 "Invalid floating point type as target of conversion");
8630 assert(Subtarget.hasFPCVT() &&
8631 "Int to FP conversions with direct moves require FPCVT");
8632 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8633 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8634 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8635 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8636 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8637 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8638 return convertIntToFP(Op, Mov, DAG, Subtarget);
8639}
8640
8641static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8642
8643 EVT VecVT = Vec.getValueType();
8644 assert(VecVT.isVector() && "Expected a vector type.");
8645 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8646
8647 EVT EltVT = VecVT.getVectorElementType();
8648 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8649 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8650
8651 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8652 SmallVector<SDValue, 16> Ops(NumConcat);
8653 Ops[0] = Vec;
8654 SDValue UndefVec = DAG.getUNDEF(VecVT);
8655 for (unsigned i = 1; i < NumConcat; ++i)
8656 Ops[i] = UndefVec;
8657
8658 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8659}
8660
8661SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8662 const SDLoc &dl) const {
8663 bool IsStrict = Op->isStrictFPOpcode();
8664 unsigned Opc = Op.getOpcode();
8665 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8668 "Unexpected conversion type");
8669 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8670 "Supports conversions to v2f64/v4f32 only.");
8671
8672 // TODO: Any other flags to propagate?
8673 SDNodeFlags Flags;
8674 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8675
8676 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8677 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8678
8679 SDValue Wide = widenVec(DAG, Src, dl);
8680 EVT WideVT = Wide.getValueType();
8681 unsigned WideNumElts = WideVT.getVectorNumElements();
8682 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8683
8684 SmallVector<int, 16> ShuffV;
8685 for (unsigned i = 0; i < WideNumElts; ++i)
8686 ShuffV.push_back(i + WideNumElts);
8687
8688 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8689 int SaveElts = FourEltRes ? 4 : 2;
8690 if (Subtarget.isLittleEndian())
8691 for (int i = 0; i < SaveElts; i++)
8692 ShuffV[i * Stride] = i;
8693 else
8694 for (int i = 1; i <= SaveElts; i++)
8695 ShuffV[i * Stride - 1] = i - 1;
8696
8697 SDValue ShuffleSrc2 =
8698 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8699 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8700
8701 SDValue Extend;
8702 if (SignedConv) {
8703 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8704 EVT ExtVT = Src.getValueType();
8705 if (Subtarget.hasP9Altivec())
8706 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8707 IntermediateVT.getVectorNumElements());
8708
8709 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8710 DAG.getValueType(ExtVT));
8711 } else
8712 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8713
8714 if (IsStrict)
8715 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8716 {Op.getOperand(0), Extend}, Flags);
8717
8718 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8719}
8720
8721SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8722 SelectionDAG &DAG) const {
8723 SDLoc dl(Op);
8724 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8725 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8726 bool IsStrict = Op->isStrictFPOpcode();
8727 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8728 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8729
8730 // TODO: Any other flags to propagate?
8731 SDNodeFlags Flags;
8732 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8733
8734 EVT InVT = Src.getValueType();
8735 EVT OutVT = Op.getValueType();
8736 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8737 isOperationCustom(Op.getOpcode(), InVT))
8738 return LowerINT_TO_FPVector(Op, DAG, dl);
8739
8740 // Conversions to f128 are legal.
8741 if (Op.getValueType() == MVT::f128)
8742 return Subtarget.hasP9Vector() ? Op : SDValue();
8743
8744 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8745 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8746 return SDValue();
8747
8748 if (Src.getValueType() == MVT::i1) {
8749 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8750 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8751 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8752 if (IsStrict)
8753 return DAG.getMergeValues({Sel, Chain}, dl);
8754 else
8755 return Sel;
8756 }
8757
8758 // If we have direct moves, we can do all the conversion, skip the store/load
8759 // however, without FPCVT we can't do most conversions.
8760 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8761 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8762 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8763
8764 assert((IsSigned || Subtarget.hasFPCVT()) &&
8765 "UINT_TO_FP is supported only with FPCVT");
8766
8767 if (Src.getValueType() == MVT::i64) {
8768 SDValue SINT = Src;
8769 // When converting to single-precision, we actually need to convert
8770 // to double-precision first and then round to single-precision.
8771 // To avoid double-rounding effects during that operation, we have
8772 // to prepare the input operand. Bits that might be truncated when
8773 // converting to double-precision are replaced by a bit that won't
8774 // be lost at this stage, but is below the single-precision rounding
8775 // position.
8776 //
8777 // However, if afn is in effect, accept double
8778 // rounding to avoid the extra overhead.
8779 // FIXME: Currently INT_TO_FP can't support fast math flags because
8780 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8781 // false.
8782 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8783 !Op->getFlags().hasApproximateFuncs()) {
8784
8785 // Twiddle input to make sure the low 11 bits are zero. (If this
8786 // is the case, we are guaranteed the value will fit into the 53 bit
8787 // mantissa of an IEEE double-precision value without rounding.)
8788 // If any of those low 11 bits were not zero originally, make sure
8789 // bit 12 (value 2048) is set instead, so that the final rounding
8790 // to single-precision gets the correct result.
8791 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8792 SINT, DAG.getConstant(2047, dl, MVT::i64));
8793 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8794 Round, DAG.getConstant(2047, dl, MVT::i64));
8795 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8796 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8797 DAG.getSignedConstant(-2048, dl, MVT::i64));
8798
8799 // However, we cannot use that value unconditionally: if the magnitude
8800 // of the input value is small, the bit-twiddling we did above might
8801 // end up visibly changing the output. Fortunately, in that case, we
8802 // don't need to twiddle bits since the original input will convert
8803 // exactly to double-precision floating-point already. Therefore,
8804 // construct a conditional to use the original value if the top 11
8805 // bits are all sign-bit copies, and use the rounded value computed
8806 // above otherwise.
8807 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8808 SINT, DAG.getConstant(53, dl, MVT::i32));
8809 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8810 Cond, DAG.getConstant(1, dl, MVT::i64));
8811 Cond = DAG.getSetCC(
8812 dl,
8813 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8814 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8815
8816 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8817 }
8818
8819 ReuseLoadInfo RLI;
8820 SDValue Bits;
8821
8822 MachineFunction &MF = DAG.getMachineFunction();
8823 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8824 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8825 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8826 if (RLI.ResChain)
8827 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8828 } else if (Subtarget.hasLFIWAX() &&
8829 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8830 MachineMemOperand *MMO =
8832 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8833 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8834 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8835 DAG.getVTList(MVT::f64, MVT::Other),
8836 Ops, MVT::i32, MMO);
8837 if (RLI.ResChain)
8838 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8839 } else if (Subtarget.hasFPCVT() &&
8840 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8841 MachineMemOperand *MMO =
8843 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8844 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8845 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8846 DAG.getVTList(MVT::f64, MVT::Other),
8847 Ops, MVT::i32, MMO);
8848 if (RLI.ResChain)
8849 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8850 } else if (((Subtarget.hasLFIWAX() &&
8851 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8852 (Subtarget.hasFPCVT() &&
8853 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8854 SINT.getOperand(0).getValueType() == MVT::i32) {
8855 MachineFrameInfo &MFI = MF.getFrameInfo();
8856 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8857
8858 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8859 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8860
8861 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8863 DAG.getMachineFunction(), FrameIdx));
8864 Chain = Store;
8865
8866 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8867 "Expected an i32 store");
8868
8869 RLI.Ptr = FIdx;
8870 RLI.Chain = Chain;
8871 RLI.MPI =
8873 RLI.Alignment = Align(4);
8874
8875 MachineMemOperand *MMO =
8877 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8878 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8880 PPCISD::LFIWZX : PPCISD::LFIWAX,
8881 dl, DAG.getVTList(MVT::f64, MVT::Other),
8882 Ops, MVT::i32, MMO);
8883 Chain = Bits.getValue(1);
8884 } else
8885 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8886
8887 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8888 if (IsStrict)
8889 Chain = FP.getValue(1);
8890
8891 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8892 if (IsStrict)
8893 FP = DAG.getNode(
8894 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8895 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8896 Flags);
8897 else
8898 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8899 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8900 }
8901 return FP;
8902 }
8903
8904 assert(Src.getValueType() == MVT::i32 &&
8905 "Unhandled INT_TO_FP type in custom expander!");
8906 // Since we only generate this in 64-bit mode, we can take advantage of
8907 // 64-bit registers. In particular, sign extend the input value into the
8908 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8909 // then lfd it and fcfid it.
8910 MachineFunction &MF = DAG.getMachineFunction();
8911 MachineFrameInfo &MFI = MF.getFrameInfo();
8912 EVT PtrVT = getPointerTy(MF.getDataLayout());
8913
8914 SDValue Ld;
8915 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8916 ReuseLoadInfo RLI;
8917 bool ReusingLoad;
8918 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8919 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8920 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8921
8922 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8924 DAG.getMachineFunction(), FrameIdx));
8925 Chain = Store;
8926
8927 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8928 "Expected an i32 store");
8929
8930 RLI.Ptr = FIdx;
8931 RLI.Chain = Chain;
8932 RLI.MPI =
8934 RLI.Alignment = Align(4);
8935 }
8936
8937 MachineMemOperand *MMO =
8939 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8940 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8941 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8942 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8943 MVT::i32, MMO);
8944 Chain = Ld.getValue(1);
8945 if (ReusingLoad && RLI.ResChain) {
8946 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8947 }
8948 } else {
8949 assert(Subtarget.isPPC64() &&
8950 "i32->FP without LFIWAX supported only on PPC64");
8951
8952 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8953 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8954
8955 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8956
8957 // STD the extended value into the stack slot.
8958 SDValue Store = DAG.getStore(
8959 Chain, dl, Ext64, FIdx,
8961 Chain = Store;
8962
8963 // Load the value as a double.
8964 Ld = DAG.getLoad(
8965 MVT::f64, dl, Chain, FIdx,
8967 Chain = Ld.getValue(1);
8968 }
8969
8970 // FCFID it and return it.
8971 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8972 if (IsStrict)
8973 Chain = FP.getValue(1);
8974 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8975 if (IsStrict)
8976 FP = DAG.getNode(
8977 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8978 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8979 else
8980 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8981 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8982 }
8983 return FP;
8984}
8985
8986SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8987 SelectionDAG &DAG) const {
8988 SDLoc Dl(Op);
8989 MachineFunction &MF = DAG.getMachineFunction();
8990 EVT PtrVT = getPointerTy(MF.getDataLayout());
8991 SDValue Chain = Op.getOperand(0);
8992
8993 // If requested mode is constant, just use simpler mtfsb/mffscrni
8994 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8995 uint64_t Mode = CVal->getZExtValue();
8996 assert(Mode < 4 && "Unsupported rounding mode!");
8997 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8998 if (Subtarget.isISA3_0())
8999 return SDValue(
9000 DAG.getMachineNode(
9001 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9002 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9003 1);
9004 SDNode *SetHi = DAG.getMachineNode(
9005 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9006 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9007 SDNode *SetLo = DAG.getMachineNode(
9008 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9009 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9010 return SDValue(SetLo, 0);
9011 }
9012
9013 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9014 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9015 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9016 DAG.getConstant(3, Dl, MVT::i32));
9017 SDValue DstFlag = DAG.getNode(
9018 ISD::XOR, Dl, MVT::i32, SrcFlag,
9019 DAG.getNode(ISD::AND, Dl, MVT::i32,
9020 DAG.getNOT(Dl,
9021 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9022 MVT::i32),
9023 One));
9024 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9025 SDValue MFFS;
9026 if (!Subtarget.isISA3_0()) {
9027 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9028 Chain = MFFS.getValue(1);
9029 }
9030 SDValue NewFPSCR;
9031 if (Subtarget.isPPC64()) {
9032 if (Subtarget.isISA3_0()) {
9033 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9034 } else {
9035 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9036 SDNode *InsertRN = DAG.getMachineNode(
9037 PPC::RLDIMI, Dl, MVT::i64,
9038 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9039 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9040 DAG.getTargetConstant(0, Dl, MVT::i32),
9041 DAG.getTargetConstant(62, Dl, MVT::i32)});
9042 NewFPSCR = SDValue(InsertRN, 0);
9043 }
9044 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9045 } else {
9046 // In 32-bit mode, store f64, load and update the lower half.
9047 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9048 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9049 SDValue Addr = Subtarget.isLittleEndian()
9050 ? StackSlot
9051 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9052 DAG.getConstant(4, Dl, PtrVT));
9053 if (Subtarget.isISA3_0()) {
9054 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9055 } else {
9056 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9057 SDValue Tmp =
9058 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9059 Chain = Tmp.getValue(1);
9060 Tmp = SDValue(DAG.getMachineNode(
9061 PPC::RLWIMI, Dl, MVT::i32,
9062 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9063 DAG.getTargetConstant(30, Dl, MVT::i32),
9064 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9065 0);
9066 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9067 }
9068 NewFPSCR =
9069 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9070 Chain = NewFPSCR.getValue(1);
9071 }
9072 if (Subtarget.isISA3_0())
9073 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9074 {NewFPSCR, Chain}),
9075 1);
9076 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9077 SDNode *MTFSF = DAG.getMachineNode(
9078 PPC::MTFSF, Dl, MVT::Other,
9079 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9080 return SDValue(MTFSF, 0);
9081}
9082
9083SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9084 SelectionDAG &DAG) const {
9085 SDLoc dl(Op);
9086 /*
9087 The rounding mode is in bits 30:31 of FPSR, and has the following
9088 settings:
9089 00 Round to nearest
9090 01 Round to 0
9091 10 Round to +inf
9092 11 Round to -inf
9093
9094 GET_ROUNDING, on the other hand, expects the following:
9095 -1 Undefined
9096 0 Round to 0
9097 1 Round to nearest
9098 2 Round to +inf
9099 3 Round to -inf
9100
9101 To perform the conversion, we do:
9102 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9103 */
9104
9105 MachineFunction &MF = DAG.getMachineFunction();
9106 EVT VT = Op.getValueType();
9107 EVT PtrVT = getPointerTy(MF.getDataLayout());
9108
9109 // Save FP Control Word to register
9110 SDValue Chain = Op.getOperand(0);
9111 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9112 Chain = MFFS.getValue(1);
9113
9114 SDValue CWD;
9115 if (isTypeLegal(MVT::i64)) {
9116 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9117 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9118 } else {
9119 // Save FP register to stack slot
9120 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9121 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9122 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9123
9124 // Load FP Control Word from low 32 bits of stack slot.
9126 "Stack slot adjustment is valid only on big endian subtargets!");
9127 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9128 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9129 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9130 Chain = CWD.getValue(1);
9131 }
9132
9133 // Transform as necessary
9134 SDValue CWD1 =
9135 DAG.getNode(ISD::AND, dl, MVT::i32,
9136 CWD, DAG.getConstant(3, dl, MVT::i32));
9137 SDValue CWD2 =
9138 DAG.getNode(ISD::SRL, dl, MVT::i32,
9139 DAG.getNode(ISD::AND, dl, MVT::i32,
9140 DAG.getNode(ISD::XOR, dl, MVT::i32,
9141 CWD, DAG.getConstant(3, dl, MVT::i32)),
9142 DAG.getConstant(3, dl, MVT::i32)),
9143 DAG.getConstant(1, dl, MVT::i32));
9144
9145 SDValue RetVal =
9146 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9147
9148 RetVal =
9150 dl, VT, RetVal);
9151
9152 return DAG.getMergeValues({RetVal, Chain}, dl);
9153}
9154
9155SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9156 EVT VT = Op.getValueType();
9157 uint64_t BitWidth = VT.getSizeInBits();
9158 SDLoc dl(Op);
9159 assert(Op.getNumOperands() == 3 &&
9160 VT == Op.getOperand(1).getValueType() &&
9161 "Unexpected SHL!");
9162
9163 // Expand into a bunch of logical ops. Note that these ops
9164 // depend on the PPC behavior for oversized shift amounts.
9165 SDValue Lo = Op.getOperand(0);
9166 SDValue Hi = Op.getOperand(1);
9167 SDValue Amt = Op.getOperand(2);
9168 EVT AmtVT = Amt.getValueType();
9169
9170 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9171 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9172 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9173 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9174 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9175 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9176 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9177 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9178 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9179 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9180 SDValue OutOps[] = { OutLo, OutHi };
9181 return DAG.getMergeValues(OutOps, dl);
9182}
9183
9184SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9185 EVT VT = Op.getValueType();
9186 SDLoc dl(Op);
9187 uint64_t BitWidth = VT.getSizeInBits();
9188 assert(Op.getNumOperands() == 3 &&
9189 VT == Op.getOperand(1).getValueType() &&
9190 "Unexpected SRL!");
9191
9192 // Expand into a bunch of logical ops. Note that these ops
9193 // depend on the PPC behavior for oversized shift amounts.
9194 SDValue Lo = Op.getOperand(0);
9195 SDValue Hi = Op.getOperand(1);
9196 SDValue Amt = Op.getOperand(2);
9197 EVT AmtVT = Amt.getValueType();
9198
9199 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9200 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9201 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9202 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9203 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9204 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9205 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9206 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9207 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9208 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9209 SDValue OutOps[] = { OutLo, OutHi };
9210 return DAG.getMergeValues(OutOps, dl);
9211}
9212
9213SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9214 SDLoc dl(Op);
9215 EVT VT = Op.getValueType();
9216 uint64_t BitWidth = VT.getSizeInBits();
9217 assert(Op.getNumOperands() == 3 &&
9218 VT == Op.getOperand(1).getValueType() &&
9219 "Unexpected SRA!");
9220
9221 // Expand into a bunch of logical ops, followed by a select_cc.
9222 SDValue Lo = Op.getOperand(0);
9223 SDValue Hi = Op.getOperand(1);
9224 SDValue Amt = Op.getOperand(2);
9225 EVT AmtVT = Amt.getValueType();
9226
9227 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9228 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9229 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9230 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9231 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9232 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9233 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9234 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9235 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9236 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9237 Tmp4, Tmp6, ISD::SETLE);
9238 SDValue OutOps[] = { OutLo, OutHi };
9239 return DAG.getMergeValues(OutOps, dl);
9240}
9241
9242SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9243 SelectionDAG &DAG) const {
9244 SDLoc dl(Op);
9245 EVT VT = Op.getValueType();
9246 unsigned BitWidth = VT.getSizeInBits();
9247
9248 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9249 SDValue X = Op.getOperand(0);
9250 SDValue Y = Op.getOperand(1);
9251 SDValue Z = Op.getOperand(2);
9252 EVT AmtVT = Z.getValueType();
9253
9254 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9255 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9256 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9257 // on PowerPC shift by BW being well defined.
9258 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9259 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9260 SDValue SubZ =
9261 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9262 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9263 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9264 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9265}
9266
9267//===----------------------------------------------------------------------===//
9268// Vector related lowering.
9269//
9270
9271/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9272/// element size of SplatSize. Cast the result to VT.
9273static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9274 SelectionDAG &DAG, const SDLoc &dl) {
9275 static const MVT VTys[] = { // canonical VT to use for each size.
9276 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9277 };
9278
9279 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9280
9281 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9282 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9283 SplatSize = 1;
9284 Val = 0xFF;
9285 }
9286
9287 EVT CanonicalVT = VTys[SplatSize-1];
9288
9289 // Build a canonical splat for this value.
9290 // Explicitly truncate APInt here, as this API is used with a mix of
9291 // signed and unsigned values.
9292 return DAG.getBitcast(
9293 ReqVT,
9294 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9295}
9296
9297/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9298/// specified intrinsic ID.
9300 const SDLoc &dl, EVT DestVT = MVT::Other) {
9301 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9302 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9303 DAG.getConstant(IID, dl, MVT::i32), Op);
9304}
9305
9306/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9307/// specified intrinsic ID.
9309 SelectionDAG &DAG, const SDLoc &dl,
9310 EVT DestVT = MVT::Other) {
9311 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9312 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9313 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9314}
9315
9316/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9317/// specified intrinsic ID.
9318static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9319 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9320 EVT DestVT = MVT::Other) {
9321 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9322 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9323 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9324}
9325
9326/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9327/// amount. The result has the specified value type.
9328static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9329 SelectionDAG &DAG, const SDLoc &dl) {
9330 // Force LHS/RHS to be the right type.
9331 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9332 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9333
9334 int Ops[16];
9335 for (unsigned i = 0; i != 16; ++i)
9336 Ops[i] = i + Amt;
9337 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9338 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9339}
9340
9341/// Do we have an efficient pattern in a .td file for this node?
9342///
9343/// \param V - pointer to the BuildVectorSDNode being matched
9344/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9345///
9346/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9347/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9348/// the opposite is true (expansion is beneficial) are:
9349/// - The node builds a vector out of integers that are not 32 or 64-bits
9350/// - The node builds a vector out of constants
9351/// - The node is a "load-and-splat"
9352/// In all other cases, we will choose to keep the BUILD_VECTOR.
9354 bool HasDirectMove,
9355 bool HasP8Vector) {
9356 EVT VecVT = V->getValueType(0);
9357 bool RightType = VecVT == MVT::v2f64 ||
9358 (HasP8Vector && VecVT == MVT::v4f32) ||
9359 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9360 if (!RightType)
9361 return false;
9362
9363 bool IsSplat = true;
9364 bool IsLoad = false;
9365 SDValue Op0 = V->getOperand(0);
9366
9367 // This function is called in a block that confirms the node is not a constant
9368 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9369 // different constants.
9370 if (V->isConstant())
9371 return false;
9372 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9373 if (V->getOperand(i).isUndef())
9374 return false;
9375 // We want to expand nodes that represent load-and-splat even if the
9376 // loaded value is a floating point truncation or conversion to int.
9377 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9378 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9379 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9380 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9381 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9382 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9383 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9384 IsLoad = true;
9385 // If the operands are different or the input is not a load and has more
9386 // uses than just this BV node, then it isn't a splat.
9387 if (V->getOperand(i) != Op0 ||
9388 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9389 IsSplat = false;
9390 }
9391 return !(IsSplat && IsLoad);
9392}
9393
9394// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9395SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9396
9397 SDLoc dl(Op);
9398 SDValue Op0 = Op->getOperand(0);
9399
9400 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9401 (Op.getValueType() != MVT::f128))
9402 return SDValue();
9403
9404 SDValue Lo = Op0.getOperand(0);
9405 SDValue Hi = Op0.getOperand(1);
9406 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9407 return SDValue();
9408
9409 if (!Subtarget.isLittleEndian())
9410 std::swap(Lo, Hi);
9411
9412 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9413}
9414
9415static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9416 const SDValue *InputLoad = &Op;
9417 while (InputLoad->getOpcode() == ISD::BITCAST)
9418 InputLoad = &InputLoad->getOperand(0);
9419 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9420 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9421 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9422 InputLoad = &InputLoad->getOperand(0);
9423 }
9424 if (InputLoad->getOpcode() != ISD::LOAD)
9425 return nullptr;
9426 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9427 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9428}
9429
9430// Convert the argument APFloat to a single precision APFloat if there is no
9431// loss in information during the conversion to single precision APFloat and the
9432// resulting number is not a denormal number. Return true if successful.
9434 APFloat APFloatToConvert = ArgAPFloat;
9435 bool LosesInfo = true;
9437 &LosesInfo);
9438 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9439 if (Success)
9440 ArgAPFloat = APFloatToConvert;
9441 return Success;
9442}
9443
9444// Bitcast the argument APInt to a double and convert it to a single precision
9445// APFloat, bitcast the APFloat to an APInt and assign it to the original
9446// argument if there is no loss in information during the conversion from
9447// double to single precision APFloat and the resulting number is not a denormal
9448// number. Return true if successful.
9450 double DpValue = ArgAPInt.bitsToDouble();
9451 APFloat APFloatDp(DpValue);
9452 bool Success = convertToNonDenormSingle(APFloatDp);
9453 if (Success)
9454 ArgAPInt = APFloatDp.bitcastToAPInt();
9455 return Success;
9456}
9457
9458// Nondestructive check for convertTonNonDenormSingle.
9460 // Only convert if it loses info, since XXSPLTIDP should
9461 // handle the other case.
9462 APFloat APFloatToConvert = ArgAPFloat;
9463 bool LosesInfo = true;
9465 &LosesInfo);
9466
9467 return (!LosesInfo && !APFloatToConvert.isDenormal());
9468}
9469
9470static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9471 unsigned &Opcode) {
9472 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9473 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9474 return false;
9475
9476 EVT Ty = Op->getValueType(0);
9477 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9478 // as we cannot handle extending loads for these types.
9479 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9480 ISD::isNON_EXTLoad(InputNode))
9481 return true;
9482
9483 EVT MemVT = InputNode->getMemoryVT();
9484 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9485 // memory VT is the same vector element VT type.
9486 // The loads feeding into the v8i16 and v16i8 types will be extending because
9487 // scalar i8/i16 are not legal types.
9488 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9489 (MemVT == Ty.getVectorElementType()))
9490 return true;
9491
9492 if (Ty == MVT::v2i64) {
9493 // Check the extend type, when the input type is i32, and the output vector
9494 // type is v2i64.
9495 if (MemVT == MVT::i32) {
9496 if (ISD::isZEXTLoad(InputNode))
9497 Opcode = PPCISD::ZEXT_LD_SPLAT;
9498 if (ISD::isSEXTLoad(InputNode))
9499 Opcode = PPCISD::SEXT_LD_SPLAT;
9500 }
9501 return true;
9502 }
9503 return false;
9504}
9505
9507 bool IsLittleEndian) {
9508 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9509
9510 BitMask.clearAllBits();
9511 EVT VT = BVN.getValueType(0);
9512 unsigned VTSize = VT.getSizeInBits();
9513 APInt ConstValue(VTSize, 0);
9514
9515 unsigned EltWidth = VT.getScalarSizeInBits();
9516
9517 unsigned BitPos = 0;
9518 for (auto OpVal : BVN.op_values()) {
9519 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9520
9521 if (!CN)
9522 return false;
9523 // The elements in a vector register are ordered in reverse byte order
9524 // between little-endian and big-endian modes.
9525 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9526 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9527 BitPos += EltWidth;
9528 }
9529
9530 for (unsigned J = 0; J < 16; ++J) {
9531 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9532 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9533 return false;
9534 if (ExtractValue == 0xFF)
9535 BitMask.setBit(J);
9536 }
9537 return true;
9538}
9539
9540// If this is a case we can't handle, return null and let the default
9541// expansion code take care of it. If we CAN select this case, and if it
9542// selects to a single instruction, return Op. Otherwise, if we can codegen
9543// this case more efficiently than a constant pool load, lower it to the
9544// sequence of ops that should be used.
9545SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9546 SelectionDAG &DAG) const {
9547 SDLoc dl(Op);
9548 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9549 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9550
9551 if (Subtarget.hasP10Vector()) {
9552 APInt BitMask(32, 0);
9553 // If the value of the vector is all zeros or all ones,
9554 // we do not convert it to MTVSRBMI.
9555 // The xxleqv instruction sets a vector with all ones.
9556 // The xxlxor instruction sets a vector with all zeros.
9557 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9558 BitMask != 0 && BitMask != 0xffff) {
9559 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9560 MachineSDNode *MSDNode =
9561 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9562 SDValue SDV = SDValue(MSDNode, 0);
9563 EVT DVT = BVN->getValueType(0);
9564 EVT SVT = SDV.getValueType();
9565 if (SVT != DVT) {
9566 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9567 }
9568 return SDV;
9569 }
9570 // Recognize build vector patterns to emit VSX vector instructions
9571 // instead of loading value from memory.
9572 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9573 return VecPat;
9574 }
9575 // Check if this is a splat of a constant value.
9576 APInt APSplatBits, APSplatUndef;
9577 unsigned SplatBitSize;
9578 bool HasAnyUndefs;
9579 bool BVNIsConstantSplat =
9580 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9581 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9582
9583 // If it is a splat of a double, check if we can shrink it to a 32 bit
9584 // non-denormal float which when converted back to double gives us the same
9585 // double. This is to exploit the XXSPLTIDP instruction.
9586 // If we lose precision, we use XXSPLTI32DX.
9587 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9588 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9589 // Check the type first to short-circuit so we don't modify APSplatBits if
9590 // this block isn't executed.
9591 if ((Op->getValueType(0) == MVT::v2f64) &&
9592 convertToNonDenormSingle(APSplatBits)) {
9593 SDValue SplatNode = DAG.getNode(
9594 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9595 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9596 return DAG.getBitcast(Op.getValueType(), SplatNode);
9597 } else {
9598 // We may lose precision, so we have to use XXSPLTI32DX.
9599
9600 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9601 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9602 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9603
9604 if (!Hi || !Lo)
9605 // If either load is 0, then we should generate XXLXOR to set to 0.
9606 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9607
9608 if (Hi)
9609 SplatNode = DAG.getNode(
9610 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9611 DAG.getTargetConstant(0, dl, MVT::i32),
9612 DAG.getTargetConstant(Hi, dl, MVT::i32));
9613
9614 if (Lo)
9615 SplatNode =
9616 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9617 DAG.getTargetConstant(1, dl, MVT::i32),
9618 DAG.getTargetConstant(Lo, dl, MVT::i32));
9619
9620 return DAG.getBitcast(Op.getValueType(), SplatNode);
9621 }
9622 }
9623
9624 bool IsSplat64 = false;
9625 uint64_t SplatBits = 0;
9626 int32_t SextVal = 0;
9627 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9628 SplatBits = APSplatBits.getZExtValue();
9629 if (SplatBitSize <= 32) {
9630 SextVal = SignExtend32(SplatBits, SplatBitSize);
9631 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9632 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9633 bool P9Vector = Subtarget.hasP9Vector();
9634 int32_t Hi = P9Vector ? 127 : 15;
9635 int32_t Lo = P9Vector ? -128 : -16;
9636 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9637 SextVal = static_cast<int32_t>(SplatBits);
9638 }
9639 }
9640
9641 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9642 unsigned NewOpcode = PPCISD::LD_SPLAT;
9643
9644 // Handle load-and-splat patterns as we have instructions that will do this
9645 // in one go.
9646 if (DAG.isSplatValue(Op, true) &&
9647 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9648 const SDValue *InputLoad = &Op.getOperand(0);
9649 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9650
9651 // If the input load is an extending load, it will be an i32 -> i64
9652 // extending load and isValidSplatLoad() will update NewOpcode.
9653 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9654 unsigned ElementSize =
9655 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9656
9657 assert(((ElementSize == 2 * MemorySize)
9658 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9659 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9660 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9661 "Unmatched element size and opcode!\n");
9662
9663 // Checking for a single use of this load, we have to check for vector
9664 // width (128 bits) / ElementSize uses (since each operand of the
9665 // BUILD_VECTOR is a separate use of the value.
9666 unsigned NumUsesOfInputLD = 128 / ElementSize;
9667 for (SDValue BVInOp : Op->ops())
9668 if (BVInOp.isUndef())
9669 NumUsesOfInputLD--;
9670
9671 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9672 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9673 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9674 // 15", but function IsValidSplatLoad() now will only return true when
9675 // the data at index 0 is not nullptr. So we will not get into trouble for
9676 // these cases.
9677 //
9678 // case 1 - lfiwzx/lfiwax
9679 // 1.1: load result is i32 and is sign/zero extend to i64;
9680 // 1.2: build a v2i64 vector type with above loaded value;
9681 // 1.3: the vector has only one value at index 0, others are all undef;
9682 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9683 if (NumUsesOfInputLD == 1 &&
9684 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9685 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9686 Subtarget.hasLFIWAX()))
9687 return SDValue();
9688
9689 // case 2 - lxvr[hb]x
9690 // 2.1: load result is at most i16;
9691 // 2.2: build a vector with above loaded value;
9692 // 2.3: the vector has only one value at index 0, others are all undef;
9693 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9694 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9695 Subtarget.isISA3_1() && ElementSize <= 16)
9696 return SDValue();
9697
9698 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9699 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9700 Subtarget.hasVSX()) {
9701 SDValue Ops[] = {
9702 LD->getChain(), // Chain
9703 LD->getBasePtr(), // Ptr
9704 DAG.getValueType(Op.getValueType()) // VT
9705 };
9706 SDValue LdSplt = DAG.getMemIntrinsicNode(
9707 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9708 LD->getMemoryVT(), LD->getMemOperand());
9709 // Replace all uses of the output chain of the original load with the
9710 // output chain of the new load.
9711 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9712 LdSplt.getValue(1));
9713 return LdSplt;
9714 }
9715 }
9716
9717 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9718 // 32-bits can be lowered to VSX instructions under certain conditions.
9719 // Without VSX, there is no pattern more efficient than expanding the node.
9720 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9721 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9722 Subtarget.hasP8Vector()))
9723 return Op;
9724 return SDValue();
9725 }
9726
9727 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9728 unsigned SplatSize = SplatBitSize / 8;
9729
9730 // First, handle single instruction cases.
9731
9732 // All zeros?
9733 if (SplatBits == 0) {
9734 // Canonicalize all zero vectors to be v4i32.
9735 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9736 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9737 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9738 }
9739 return Op;
9740 }
9741
9742 // We have XXSPLTIW for constant splats four bytes wide.
9743 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9744 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9745 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9746 // turned into a 4-byte splat of 0xABABABAB.
9747 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9748 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9749 Op.getValueType(), DAG, dl);
9750
9751 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9752 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9753 dl);
9754
9755 // We have XXSPLTIB for constant splats one byte wide.
9756 if (Subtarget.hasP9Vector() && SplatSize == 1)
9757 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9758 dl);
9759
9760 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9761 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9762 if (SextVal >= -16 && SextVal <= 15) {
9763 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9764 // generate a splat word with extend for size 8.
9765 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9766 SDValue Res =
9767 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9768 if (SplatSize != 8)
9769 return Res;
9770 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9771 }
9772
9773 // Two instruction sequences.
9774
9775 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9776 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9778 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9779 unsigned IID;
9780 EVT VT;
9781 switch (SplatSize) {
9782 default:
9783 llvm_unreachable("Unexpected type for vector constant.");
9784 case 2:
9785 IID = Intrinsic::ppc_altivec_vupklsb;
9786 VT = MVT::v8i16;
9787 break;
9788 case 4:
9789 IID = Intrinsic::ppc_altivec_vextsb2w;
9790 VT = MVT::v4i32;
9791 break;
9792 case 8:
9793 IID = Intrinsic::ppc_altivec_vextsb2d;
9794 VT = MVT::v2i64;
9795 break;
9796 }
9797 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9798 return DAG.getBitcast(Op->getValueType(0), Extend);
9799 }
9800 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9801
9802 // If this value is in the range [-32,30] and is even, use:
9803 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9804 // If this value is in the range [17,31] and is odd, use:
9805 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9806 // If this value is in the range [-31,-17] and is odd, use:
9807 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9808 // Note the last two are three-instruction sequences.
9809 if (SextVal >= -32 && SextVal <= 31) {
9810 // To avoid having these optimizations undone by constant folding,
9811 // we convert to a pseudo that will be expanded later into one of
9812 // the above forms.
9813 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9814 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9815 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9816 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9817 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9818 if (VT == Op.getValueType())
9819 return RetVal;
9820 else
9821 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9822 }
9823
9824 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9825 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9826 // for fneg/fabs.
9827 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9828 // Make -1 and vspltisw -1:
9829 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9830
9831 // Make the VSLW intrinsic, computing 0x8000_0000.
9832 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9833 OnesV, DAG, dl);
9834
9835 // xor by OnesV to invert it.
9836 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9837 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9838 }
9839
9840 // Check to see if this is a wide variety of vsplti*, binop self cases.
9841 static const signed char SplatCsts[] = {
9842 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9843 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9844 };
9845
9846 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9847 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9848 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9849 int i = SplatCsts[idx];
9850
9851 // Figure out what shift amount will be used by altivec if shifted by i in
9852 // this splat size.
9853 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9854
9855 // vsplti + shl self.
9856 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9857 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9858 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9859 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9860 Intrinsic::ppc_altivec_vslw
9861 };
9862 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9863 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9864 }
9865
9866 // vsplti + srl self.
9867 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9868 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9869 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9870 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9871 Intrinsic::ppc_altivec_vsrw
9872 };
9873 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9874 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9875 }
9876
9877 // vsplti + rol self.
9878 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9879 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9880 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9881 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9882 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9883 Intrinsic::ppc_altivec_vrlw
9884 };
9885 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9886 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9887 }
9888
9889 // t = vsplti c, result = vsldoi t, t, 1
9890 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9891 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9892 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9893 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9894 }
9895 // t = vsplti c, result = vsldoi t, t, 2
9896 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9897 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9898 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9899 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9900 }
9901 // t = vsplti c, result = vsldoi t, t, 3
9902 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9903 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9904 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9905 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9906 }
9907 }
9908
9909 return SDValue();
9910}
9911
9912/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9913/// the specified operations to build the shuffle.
9915 SDValue RHS, SelectionDAG &DAG,
9916 const SDLoc &dl) {
9917 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9918 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9919 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9920
9921 enum {
9922 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9923 OP_VMRGHW,
9924 OP_VMRGLW,
9925 OP_VSPLTISW0,
9926 OP_VSPLTISW1,
9927 OP_VSPLTISW2,
9928 OP_VSPLTISW3,
9929 OP_VSLDOI4,
9930 OP_VSLDOI8,
9931 OP_VSLDOI12
9932 };
9933
9934 if (OpNum == OP_COPY) {
9935 if (LHSID == (1*9+2)*9+3) return LHS;
9936 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9937 return RHS;
9938 }
9939
9940 SDValue OpLHS, OpRHS;
9941 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9942 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9943
9944 int ShufIdxs[16];
9945 switch (OpNum) {
9946 default: llvm_unreachable("Unknown i32 permute!");
9947 case OP_VMRGHW:
9948 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9949 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9950 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9951 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9952 break;
9953 case OP_VMRGLW:
9954 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9955 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9956 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9957 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9958 break;
9959 case OP_VSPLTISW0:
9960 for (unsigned i = 0; i != 16; ++i)
9961 ShufIdxs[i] = (i&3)+0;
9962 break;
9963 case OP_VSPLTISW1:
9964 for (unsigned i = 0; i != 16; ++i)
9965 ShufIdxs[i] = (i&3)+4;
9966 break;
9967 case OP_VSPLTISW2:
9968 for (unsigned i = 0; i != 16; ++i)
9969 ShufIdxs[i] = (i&3)+8;
9970 break;
9971 case OP_VSPLTISW3:
9972 for (unsigned i = 0; i != 16; ++i)
9973 ShufIdxs[i] = (i&3)+12;
9974 break;
9975 case OP_VSLDOI4:
9976 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9977 case OP_VSLDOI8:
9978 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9979 case OP_VSLDOI12:
9980 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9981 }
9982 EVT VT = OpLHS.getValueType();
9983 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9984 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9985 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9986 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9987}
9988
9989/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9990/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9991/// SDValue.
9992SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9993 SelectionDAG &DAG) const {
9994 const unsigned BytesInVector = 16;
9995 bool IsLE = Subtarget.isLittleEndian();
9996 SDLoc dl(N);
9997 SDValue V1 = N->getOperand(0);
9998 SDValue V2 = N->getOperand(1);
9999 unsigned ShiftElts = 0, InsertAtByte = 0;
10000 bool Swap = false;
10001
10002 // Shifts required to get the byte we want at element 7.
10003 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10004 0, 15, 14, 13, 12, 11, 10, 9};
10005 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10006 1, 2, 3, 4, 5, 6, 7, 8};
10007
10008 ArrayRef<int> Mask = N->getMask();
10009 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10010
10011 // For each mask element, find out if we're just inserting something
10012 // from V2 into V1 or vice versa.
10013 // Possible permutations inserting an element from V2 into V1:
10014 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10015 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10016 // ...
10017 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10018 // Inserting from V1 into V2 will be similar, except mask range will be
10019 // [16,31].
10020
10021 bool FoundCandidate = false;
10022 // If both vector operands for the shuffle are the same vector, the mask
10023 // will contain only elements from the first one and the second one will be
10024 // undef.
10025 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10026 // Go through the mask of half-words to find an element that's being moved
10027 // from one vector to the other.
10028 for (unsigned i = 0; i < BytesInVector; ++i) {
10029 unsigned CurrentElement = Mask[i];
10030 // If 2nd operand is undefined, we should only look for element 7 in the
10031 // Mask.
10032 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10033 continue;
10034
10035 bool OtherElementsInOrder = true;
10036 // Examine the other elements in the Mask to see if they're in original
10037 // order.
10038 for (unsigned j = 0; j < BytesInVector; ++j) {
10039 if (j == i)
10040 continue;
10041 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10042 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10043 // in which we always assume we're always picking from the 1st operand.
10044 int MaskOffset =
10045 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10046 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10047 OtherElementsInOrder = false;
10048 break;
10049 }
10050 }
10051 // If other elements are in original order, we record the number of shifts
10052 // we need to get the element we want into element 7. Also record which byte
10053 // in the vector we should insert into.
10054 if (OtherElementsInOrder) {
10055 // If 2nd operand is undefined, we assume no shifts and no swapping.
10056 if (V2.isUndef()) {
10057 ShiftElts = 0;
10058 Swap = false;
10059 } else {
10060 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10061 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10062 : BigEndianShifts[CurrentElement & 0xF];
10063 Swap = CurrentElement < BytesInVector;
10064 }
10065 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10066 FoundCandidate = true;
10067 break;
10068 }
10069 }
10070
10071 if (!FoundCandidate)
10072 return SDValue();
10073
10074 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10075 // optionally with VECSHL if shift is required.
10076 if (Swap)
10077 std::swap(V1, V2);
10078 if (V2.isUndef())
10079 V2 = V1;
10080 if (ShiftElts) {
10081 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10082 DAG.getConstant(ShiftElts, dl, MVT::i32));
10083 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10084 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10085 }
10086 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10087 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10088}
10089
10090/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10091/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10092/// SDValue.
10093SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10094 SelectionDAG &DAG) const {
10095 const unsigned NumHalfWords = 8;
10096 const unsigned BytesInVector = NumHalfWords * 2;
10097 // Check that the shuffle is on half-words.
10098 if (!isNByteElemShuffleMask(N, 2, 1))
10099 return SDValue();
10100
10101 bool IsLE = Subtarget.isLittleEndian();
10102 SDLoc dl(N);
10103 SDValue V1 = N->getOperand(0);
10104 SDValue V2 = N->getOperand(1);
10105 unsigned ShiftElts = 0, InsertAtByte = 0;
10106 bool Swap = false;
10107
10108 // Shifts required to get the half-word we want at element 3.
10109 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10110 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10111
10112 uint32_t Mask = 0;
10113 uint32_t OriginalOrderLow = 0x1234567;
10114 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10115 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10116 // 32-bit space, only need 4-bit nibbles per element.
10117 for (unsigned i = 0; i < NumHalfWords; ++i) {
10118 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10119 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10120 }
10121
10122 // For each mask element, find out if we're just inserting something
10123 // from V2 into V1 or vice versa. Possible permutations inserting an element
10124 // from V2 into V1:
10125 // X, 1, 2, 3, 4, 5, 6, 7
10126 // 0, X, 2, 3, 4, 5, 6, 7
10127 // 0, 1, X, 3, 4, 5, 6, 7
10128 // 0, 1, 2, X, 4, 5, 6, 7
10129 // 0, 1, 2, 3, X, 5, 6, 7
10130 // 0, 1, 2, 3, 4, X, 6, 7
10131 // 0, 1, 2, 3, 4, 5, X, 7
10132 // 0, 1, 2, 3, 4, 5, 6, X
10133 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10134
10135 bool FoundCandidate = false;
10136 // Go through the mask of half-words to find an element that's being moved
10137 // from one vector to the other.
10138 for (unsigned i = 0; i < NumHalfWords; ++i) {
10139 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10140 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10141 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10142 uint32_t TargetOrder = 0x0;
10143
10144 // If both vector operands for the shuffle are the same vector, the mask
10145 // will contain only elements from the first one and the second one will be
10146 // undef.
10147 if (V2.isUndef()) {
10148 ShiftElts = 0;
10149 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10150 TargetOrder = OriginalOrderLow;
10151 Swap = false;
10152 // Skip if not the correct element or mask of other elements don't equal
10153 // to our expected order.
10154 if (MaskOneElt == VINSERTHSrcElem &&
10155 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10156 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10157 FoundCandidate = true;
10158 break;
10159 }
10160 } else { // If both operands are defined.
10161 // Target order is [8,15] if the current mask is between [0,7].
10162 TargetOrder =
10163 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10164 // Skip if mask of other elements don't equal our expected order.
10165 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10166 // We only need the last 3 bits for the number of shifts.
10167 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10168 : BigEndianShifts[MaskOneElt & 0x7];
10169 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10170 Swap = MaskOneElt < NumHalfWords;
10171 FoundCandidate = true;
10172 break;
10173 }
10174 }
10175 }
10176
10177 if (!FoundCandidate)
10178 return SDValue();
10179
10180 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10181 // optionally with VECSHL if shift is required.
10182 if (Swap)
10183 std::swap(V1, V2);
10184 if (V2.isUndef())
10185 V2 = V1;
10186 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10187 if (ShiftElts) {
10188 // Double ShiftElts because we're left shifting on v16i8 type.
10189 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10190 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10191 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10192 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10193 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10194 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10195 }
10196 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10197 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10198 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10199 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10200}
10201
10202/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10203/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10204/// return the default SDValue.
10205SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10206 SelectionDAG &DAG) const {
10207 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10208 // to v16i8. Peek through the bitcasts to get the actual operands.
10211
10212 auto ShuffleMask = SVN->getMask();
10213 SDValue VecShuffle(SVN, 0);
10214 SDLoc DL(SVN);
10215
10216 // Check that we have a four byte shuffle.
10217 if (!isNByteElemShuffleMask(SVN, 4, 1))
10218 return SDValue();
10219
10220 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10221 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10222 std::swap(LHS, RHS);
10224 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10225 if (!CommutedSV)
10226 return SDValue();
10227 ShuffleMask = CommutedSV->getMask();
10228 }
10229
10230 // Ensure that the RHS is a vector of constants.
10231 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10232 if (!BVN)
10233 return SDValue();
10234
10235 // Check if RHS is a splat of 4-bytes (or smaller).
10236 APInt APSplatValue, APSplatUndef;
10237 unsigned SplatBitSize;
10238 bool HasAnyUndefs;
10239 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10240 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10241 SplatBitSize > 32)
10242 return SDValue();
10243
10244 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10245 // The instruction splats a constant C into two words of the source vector
10246 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10247 // Thus we check that the shuffle mask is the equivalent of
10248 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10249 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10250 // within each word are consecutive, so we only need to check the first byte.
10251 SDValue Index;
10252 bool IsLE = Subtarget.isLittleEndian();
10253 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10254 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10255 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10256 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10257 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10258 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10259 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10260 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10261 else
10262 return SDValue();
10263
10264 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10265 // for XXSPLTI32DX.
10266 unsigned SplatVal = APSplatValue.getZExtValue();
10267 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10268 SplatVal |= (SplatVal << SplatBitSize);
10269
10270 SDValue SplatNode = DAG.getNode(
10271 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10272 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10273 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10274}
10275
10276/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10277/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10278/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10279/// i.e (or (shl x, C1), (srl x, 128-C1)).
10280SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10281 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10282 assert(Op.getValueType() == MVT::v1i128 &&
10283 "Only set v1i128 as custom, other type shouldn't reach here!");
10284 SDLoc dl(Op);
10285 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10286 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10287 unsigned SHLAmt = N1.getConstantOperandVal(0);
10288 if (SHLAmt % 8 == 0) {
10289 std::array<int, 16> Mask;
10290 std::iota(Mask.begin(), Mask.end(), 0);
10291 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10292 if (SDValue Shuffle =
10293 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10294 DAG.getUNDEF(MVT::v16i8), Mask))
10295 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10296 }
10297 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10298 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10299 DAG.getConstant(SHLAmt, dl, MVT::i32));
10300 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10301 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10302 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10303 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10304}
10305
10306/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10307/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10308/// return the code it can be lowered into. Worst case, it can always be
10309/// lowered into a vperm.
10310SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10311 SelectionDAG &DAG) const {
10312 SDLoc dl(Op);
10313 SDValue V1 = Op.getOperand(0);
10314 SDValue V2 = Op.getOperand(1);
10315 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10316
10317 // Any nodes that were combined in the target-independent combiner prior
10318 // to vector legalization will not be sent to the target combine. Try to
10319 // combine it here.
10320 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10321 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10322 return NewShuffle;
10323 Op = NewShuffle;
10325 V1 = Op.getOperand(0);
10326 V2 = Op.getOperand(1);
10327 }
10328 EVT VT = Op.getValueType();
10329 bool isLittleEndian = Subtarget.isLittleEndian();
10330
10331 unsigned ShiftElts, InsertAtByte;
10332 bool Swap = false;
10333
10334 // If this is a load-and-splat, we can do that with a single instruction
10335 // in some cases. However if the load has multiple uses, we don't want to
10336 // combine it because that will just produce multiple loads.
10337 bool IsPermutedLoad = false;
10338 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10339 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10340 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10341 InputLoad->hasOneUse()) {
10342 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10343 int SplatIdx =
10344 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10345
10346 // The splat index for permuted loads will be in the left half of the vector
10347 // which is strictly wider than the loaded value by 8 bytes. So we need to
10348 // adjust the splat index to point to the correct address in memory.
10349 if (IsPermutedLoad) {
10350 assert((isLittleEndian || IsFourByte) &&
10351 "Unexpected size for permuted load on big endian target");
10352 SplatIdx += IsFourByte ? 2 : 1;
10353 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10354 "Splat of a value outside of the loaded memory");
10355 }
10356
10357 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10358 // For 4-byte load-and-splat, we need Power9.
10359 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10360 uint64_t Offset = 0;
10361 if (IsFourByte)
10362 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10363 else
10364 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10365
10366 // If the width of the load is the same as the width of the splat,
10367 // loading with an offset would load the wrong memory.
10368 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10369 Offset = 0;
10370
10371 SDValue BasePtr = LD->getBasePtr();
10372 if (Offset != 0)
10374 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10375 SDValue Ops[] = {
10376 LD->getChain(), // Chain
10377 BasePtr, // BasePtr
10378 DAG.getValueType(Op.getValueType()) // VT
10379 };
10380 SDVTList VTL =
10381 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10382 SDValue LdSplt =
10383 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10384 Ops, LD->getMemoryVT(), LD->getMemOperand());
10385 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10386 if (LdSplt.getValueType() != SVOp->getValueType(0))
10387 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10388 return LdSplt;
10389 }
10390 }
10391
10392 // All v2i64 and v2f64 shuffles are legal
10393 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10394 return Op;
10395
10396 if (Subtarget.hasP9Vector() &&
10397 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10398 isLittleEndian)) {
10399 if (V2.isUndef())
10400 V2 = V1;
10401 else if (Swap)
10402 std::swap(V1, V2);
10403 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10404 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10405 if (ShiftElts) {
10406 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10407 DAG.getConstant(ShiftElts, dl, MVT::i32));
10408 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10409 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10410 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10411 }
10412 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10413 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10414 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10415 }
10416
10417 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10418 SDValue SplatInsertNode;
10419 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10420 return SplatInsertNode;
10421 }
10422
10423 if (Subtarget.hasP9Altivec()) {
10424 SDValue NewISDNode;
10425 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10426 return NewISDNode;
10427
10428 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10429 return NewISDNode;
10430 }
10431
10432 if (Subtarget.hasVSX() &&
10433 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10434 if (Swap)
10435 std::swap(V1, V2);
10436 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10437 SDValue Conv2 =
10438 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10439
10440 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10441 DAG.getConstant(ShiftElts, dl, MVT::i32));
10442 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10443 }
10444
10445 if (Subtarget.hasVSX() &&
10446 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10447 if (Swap)
10448 std::swap(V1, V2);
10449 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10450 SDValue Conv2 =
10451 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10452
10453 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10454 DAG.getConstant(ShiftElts, dl, MVT::i32));
10455 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10456 }
10457
10458 if (Subtarget.hasP9Vector()) {
10459 if (PPC::isXXBRHShuffleMask(SVOp)) {
10460 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10461 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10462 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10463 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10464 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10465 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10466 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10467 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10468 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10469 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10470 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10471 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10472 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10473 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10474 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10475 }
10476 }
10477
10478 if (Subtarget.hasVSX()) {
10479 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10480 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10481
10482 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10483 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10484 DAG.getConstant(SplatIdx, dl, MVT::i32));
10485 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10486 }
10487
10488 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10489 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10490 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10491 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10492 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10493 }
10494 }
10495
10496 // Cases that are handled by instructions that take permute immediates
10497 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10498 // selected by the instruction selector.
10499 if (V2.isUndef()) {
10500 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10501 PPC::isSplatShuffleMask(SVOp, 2) ||
10502 PPC::isSplatShuffleMask(SVOp, 4) ||
10503 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10504 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10505 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10506 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10507 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10508 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10509 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10510 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10511 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10512 (Subtarget.hasP8Altivec() && (
10513 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10514 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10515 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10516 return Op;
10517 }
10518 }
10519
10520 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10521 // and produce a fixed permutation. If any of these match, do not lower to
10522 // VPERM.
10523 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10524 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10525 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10526 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10527 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10528 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10529 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10530 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10531 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10532 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10533 (Subtarget.hasP8Altivec() && (
10534 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10535 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10536 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10537 return Op;
10538
10539 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10540 // perfect shuffle table to emit an optimal matching sequence.
10541 ArrayRef<int> PermMask = SVOp->getMask();
10542
10543 if (!DisablePerfectShuffle && !isLittleEndian) {
10544 unsigned PFIndexes[4];
10545 bool isFourElementShuffle = true;
10546 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10547 ++i) { // Element number
10548 unsigned EltNo = 8; // Start out undef.
10549 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10550 if (PermMask[i * 4 + j] < 0)
10551 continue; // Undef, ignore it.
10552
10553 unsigned ByteSource = PermMask[i * 4 + j];
10554 if ((ByteSource & 3) != j) {
10555 isFourElementShuffle = false;
10556 break;
10557 }
10558
10559 if (EltNo == 8) {
10560 EltNo = ByteSource / 4;
10561 } else if (EltNo != ByteSource / 4) {
10562 isFourElementShuffle = false;
10563 break;
10564 }
10565 }
10566 PFIndexes[i] = EltNo;
10567 }
10568
10569 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10570 // perfect shuffle vector to determine if it is cost effective to do this as
10571 // discrete instructions, or whether we should use a vperm.
10572 // For now, we skip this for little endian until such time as we have a
10573 // little-endian perfect shuffle table.
10574 if (isFourElementShuffle) {
10575 // Compute the index in the perfect shuffle table.
10576 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10577 PFIndexes[2] * 9 + PFIndexes[3];
10578
10579 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10580 unsigned Cost = (PFEntry >> 30);
10581
10582 // Determining when to avoid vperm is tricky. Many things affect the cost
10583 // of vperm, particularly how many times the perm mask needs to be
10584 // computed. For example, if the perm mask can be hoisted out of a loop or
10585 // is already used (perhaps because there are multiple permutes with the
10586 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10587 // permute mask out of the loop requires an extra register.
10588 //
10589 // As a compromise, we only emit discrete instructions if the shuffle can
10590 // be generated in 3 or fewer operations. When we have loop information
10591 // available, if this block is within a loop, we should avoid using vperm
10592 // for 3-operation perms and use a constant pool load instead.
10593 if (Cost < 3)
10594 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10595 }
10596 }
10597
10598 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10599 // vector that will get spilled to the constant pool.
10600 if (V2.isUndef()) V2 = V1;
10601
10602 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10603}
10604
10605SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10606 ArrayRef<int> PermMask, EVT VT,
10607 SDValue V1, SDValue V2) const {
10608 unsigned Opcode = PPCISD::VPERM;
10609 EVT ValType = V1.getValueType();
10610 SDLoc dl(Op);
10611 bool NeedSwap = false;
10612 bool isLittleEndian = Subtarget.isLittleEndian();
10613 bool isPPC64 = Subtarget.isPPC64();
10614
10615 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10616 (V1->hasOneUse() || V2->hasOneUse())) {
10617 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10618 "XXPERM instead\n");
10619 Opcode = PPCISD::XXPERM;
10620
10621 // The second input to XXPERM is also an output so if the second input has
10622 // multiple uses then copying is necessary, as a result we want the
10623 // single-use operand to be used as the second input to prevent copying.
10624 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10625 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10626 std::swap(V1, V2);
10627 NeedSwap = !NeedSwap;
10628 }
10629 }
10630
10631 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10632 // that it is in input element units, not in bytes. Convert now.
10633
10634 // For little endian, the order of the input vectors is reversed, and
10635 // the permutation mask is complemented with respect to 31. This is
10636 // necessary to produce proper semantics with the big-endian-based vperm
10637 // instruction.
10638 EVT EltVT = V1.getValueType().getVectorElementType();
10639 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10640
10641 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10642 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10643
10644 /*
10645 Vectors will be appended like so: [ V1 | v2 ]
10646 XXSWAPD on V1:
10647 [ A | B | C | D ] -> [ C | D | A | B ]
10648 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10649 i.e. index of A, B += 8, and index of C, D -= 8.
10650 XXSWAPD on V2:
10651 [ E | F | G | H ] -> [ G | H | E | F ]
10652 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10653 i.e. index of E, F += 8, index of G, H -= 8
10654 Swap V1 and V2:
10655 [ V1 | V2 ] -> [ V2 | V1 ]
10656 0-15 16-31 0-15 16-31
10657 i.e. index of V1 += 16, index of V2 -= 16
10658 */
10659
10660 SmallVector<SDValue, 16> ResultMask;
10661 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10662 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10663
10664 if (V1HasXXSWAPD) {
10665 if (SrcElt < 8)
10666 SrcElt += 8;
10667 else if (SrcElt < 16)
10668 SrcElt -= 8;
10669 }
10670 if (V2HasXXSWAPD) {
10671 if (SrcElt > 23)
10672 SrcElt -= 8;
10673 else if (SrcElt > 15)
10674 SrcElt += 8;
10675 }
10676 if (NeedSwap) {
10677 if (SrcElt < 16)
10678 SrcElt += 16;
10679 else
10680 SrcElt -= 16;
10681 }
10682 for (unsigned j = 0; j != BytesPerElement; ++j)
10683 if (isLittleEndian)
10684 ResultMask.push_back(
10685 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10686 else
10687 ResultMask.push_back(
10688 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10689 }
10690
10691 if (V1HasXXSWAPD) {
10692 dl = SDLoc(V1->getOperand(0));
10693 V1 = V1->getOperand(0)->getOperand(1);
10694 }
10695 if (V2HasXXSWAPD) {
10696 dl = SDLoc(V2->getOperand(0));
10697 V2 = V2->getOperand(0)->getOperand(1);
10698 }
10699
10700 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10701 if (ValType != MVT::v2f64)
10702 V1 = DAG.getBitcast(MVT::v2f64, V1);
10703 if (V2.getValueType() != MVT::v2f64)
10704 V2 = DAG.getBitcast(MVT::v2f64, V2);
10705 }
10706
10707 ShufflesHandledWithVPERM++;
10708 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10709 LLVM_DEBUG({
10710 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10711 if (Opcode == PPCISD::XXPERM) {
10712 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10713 } else {
10714 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10715 }
10716 SVOp->dump();
10717 dbgs() << "With the following permute control vector:\n";
10718 VPermMask.dump();
10719 });
10720
10721 if (Opcode == PPCISD::XXPERM)
10722 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10723
10724 // Only need to place items backwards in LE,
10725 // the mask was properly calculated.
10726 if (isLittleEndian)
10727 std::swap(V1, V2);
10728
10729 SDValue VPERMNode =
10730 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10731
10732 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10733 return VPERMNode;
10734}
10735
10736/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10737/// vector comparison. If it is, return true and fill in Opc/isDot with
10738/// information about the intrinsic.
10739static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10740 bool &isDot, const PPCSubtarget &Subtarget) {
10741 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10742 CompareOpc = -1;
10743 isDot = false;
10744 switch (IntrinsicID) {
10745 default:
10746 return false;
10747 // Comparison predicates.
10748 case Intrinsic::ppc_altivec_vcmpbfp_p:
10749 CompareOpc = 966;
10750 isDot = true;
10751 break;
10752 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10753 CompareOpc = 198;
10754 isDot = true;
10755 break;
10756 case Intrinsic::ppc_altivec_vcmpequb_p:
10757 CompareOpc = 6;
10758 isDot = true;
10759 break;
10760 case Intrinsic::ppc_altivec_vcmpequh_p:
10761 CompareOpc = 70;
10762 isDot = true;
10763 break;
10764 case Intrinsic::ppc_altivec_vcmpequw_p:
10765 CompareOpc = 134;
10766 isDot = true;
10767 break;
10768 case Intrinsic::ppc_altivec_vcmpequd_p:
10769 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10770 CompareOpc = 199;
10771 isDot = true;
10772 } else
10773 return false;
10774 break;
10775 case Intrinsic::ppc_altivec_vcmpneb_p:
10776 case Intrinsic::ppc_altivec_vcmpneh_p:
10777 case Intrinsic::ppc_altivec_vcmpnew_p:
10778 case Intrinsic::ppc_altivec_vcmpnezb_p:
10779 case Intrinsic::ppc_altivec_vcmpnezh_p:
10780 case Intrinsic::ppc_altivec_vcmpnezw_p:
10781 if (Subtarget.hasP9Altivec()) {
10782 switch (IntrinsicID) {
10783 default:
10784 llvm_unreachable("Unknown comparison intrinsic.");
10785 case Intrinsic::ppc_altivec_vcmpneb_p:
10786 CompareOpc = 7;
10787 break;
10788 case Intrinsic::ppc_altivec_vcmpneh_p:
10789 CompareOpc = 71;
10790 break;
10791 case Intrinsic::ppc_altivec_vcmpnew_p:
10792 CompareOpc = 135;
10793 break;
10794 case Intrinsic::ppc_altivec_vcmpnezb_p:
10795 CompareOpc = 263;
10796 break;
10797 case Intrinsic::ppc_altivec_vcmpnezh_p:
10798 CompareOpc = 327;
10799 break;
10800 case Intrinsic::ppc_altivec_vcmpnezw_p:
10801 CompareOpc = 391;
10802 break;
10803 }
10804 isDot = true;
10805 } else
10806 return false;
10807 break;
10808 case Intrinsic::ppc_altivec_vcmpgefp_p:
10809 CompareOpc = 454;
10810 isDot = true;
10811 break;
10812 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10813 CompareOpc = 710;
10814 isDot = true;
10815 break;
10816 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10817 CompareOpc = 774;
10818 isDot = true;
10819 break;
10820 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10821 CompareOpc = 838;
10822 isDot = true;
10823 break;
10824 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10825 CompareOpc = 902;
10826 isDot = true;
10827 break;
10828 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10829 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10830 CompareOpc = 967;
10831 isDot = true;
10832 } else
10833 return false;
10834 break;
10835 case Intrinsic::ppc_altivec_vcmpgtub_p:
10836 CompareOpc = 518;
10837 isDot = true;
10838 break;
10839 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10840 CompareOpc = 582;
10841 isDot = true;
10842 break;
10843 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10844 CompareOpc = 646;
10845 isDot = true;
10846 break;
10847 case Intrinsic::ppc_altivec_vcmpgtud_p:
10848 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10849 CompareOpc = 711;
10850 isDot = true;
10851 } else
10852 return false;
10853 break;
10854
10855 case Intrinsic::ppc_altivec_vcmpequq:
10856 case Intrinsic::ppc_altivec_vcmpgtsq:
10857 case Intrinsic::ppc_altivec_vcmpgtuq:
10858 if (!Subtarget.isISA3_1())
10859 return false;
10860 switch (IntrinsicID) {
10861 default:
10862 llvm_unreachable("Unknown comparison intrinsic.");
10863 case Intrinsic::ppc_altivec_vcmpequq:
10864 CompareOpc = 455;
10865 break;
10866 case Intrinsic::ppc_altivec_vcmpgtsq:
10867 CompareOpc = 903;
10868 break;
10869 case Intrinsic::ppc_altivec_vcmpgtuq:
10870 CompareOpc = 647;
10871 break;
10872 }
10873 break;
10874
10875 // VSX predicate comparisons use the same infrastructure
10876 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10877 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10879 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10880 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10881 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10882 if (Subtarget.hasVSX()) {
10883 switch (IntrinsicID) {
10884 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10885 CompareOpc = 99;
10886 break;
10887 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10888 CompareOpc = 115;
10889 break;
10890 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10891 CompareOpc = 107;
10892 break;
10893 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10894 CompareOpc = 67;
10895 break;
10896 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10897 CompareOpc = 83;
10898 break;
10899 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10900 CompareOpc = 75;
10901 break;
10902 }
10903 isDot = true;
10904 } else
10905 return false;
10906 break;
10907
10908 // Normal Comparisons.
10909 case Intrinsic::ppc_altivec_vcmpbfp:
10910 CompareOpc = 966;
10911 break;
10912 case Intrinsic::ppc_altivec_vcmpeqfp:
10913 CompareOpc = 198;
10914 break;
10915 case Intrinsic::ppc_altivec_vcmpequb:
10916 CompareOpc = 6;
10917 break;
10918 case Intrinsic::ppc_altivec_vcmpequh:
10919 CompareOpc = 70;
10920 break;
10921 case Intrinsic::ppc_altivec_vcmpequw:
10922 CompareOpc = 134;
10923 break;
10924 case Intrinsic::ppc_altivec_vcmpequd:
10925 if (Subtarget.hasP8Altivec())
10926 CompareOpc = 199;
10927 else
10928 return false;
10929 break;
10930 case Intrinsic::ppc_altivec_vcmpneb:
10931 case Intrinsic::ppc_altivec_vcmpneh:
10932 case Intrinsic::ppc_altivec_vcmpnew:
10933 case Intrinsic::ppc_altivec_vcmpnezb:
10934 case Intrinsic::ppc_altivec_vcmpnezh:
10935 case Intrinsic::ppc_altivec_vcmpnezw:
10936 if (Subtarget.hasP9Altivec())
10937 switch (IntrinsicID) {
10938 default:
10939 llvm_unreachable("Unknown comparison intrinsic.");
10940 case Intrinsic::ppc_altivec_vcmpneb:
10941 CompareOpc = 7;
10942 break;
10943 case Intrinsic::ppc_altivec_vcmpneh:
10944 CompareOpc = 71;
10945 break;
10946 case Intrinsic::ppc_altivec_vcmpnew:
10947 CompareOpc = 135;
10948 break;
10949 case Intrinsic::ppc_altivec_vcmpnezb:
10950 CompareOpc = 263;
10951 break;
10952 case Intrinsic::ppc_altivec_vcmpnezh:
10953 CompareOpc = 327;
10954 break;
10955 case Intrinsic::ppc_altivec_vcmpnezw:
10956 CompareOpc = 391;
10957 break;
10958 }
10959 else
10960 return false;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpgefp:
10963 CompareOpc = 454;
10964 break;
10965 case Intrinsic::ppc_altivec_vcmpgtfp:
10966 CompareOpc = 710;
10967 break;
10968 case Intrinsic::ppc_altivec_vcmpgtsb:
10969 CompareOpc = 774;
10970 break;
10971 case Intrinsic::ppc_altivec_vcmpgtsh:
10972 CompareOpc = 838;
10973 break;
10974 case Intrinsic::ppc_altivec_vcmpgtsw:
10975 CompareOpc = 902;
10976 break;
10977 case Intrinsic::ppc_altivec_vcmpgtsd:
10978 if (Subtarget.hasP8Altivec())
10979 CompareOpc = 967;
10980 else
10981 return false;
10982 break;
10983 case Intrinsic::ppc_altivec_vcmpgtub:
10984 CompareOpc = 518;
10985 break;
10986 case Intrinsic::ppc_altivec_vcmpgtuh:
10987 CompareOpc = 582;
10988 break;
10989 case Intrinsic::ppc_altivec_vcmpgtuw:
10990 CompareOpc = 646;
10991 break;
10992 case Intrinsic::ppc_altivec_vcmpgtud:
10993 if (Subtarget.hasP8Altivec())
10994 CompareOpc = 711;
10995 else
10996 return false;
10997 break;
10998 case Intrinsic::ppc_altivec_vcmpequq_p:
10999 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11000 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11001 if (!Subtarget.isISA3_1())
11002 return false;
11003 switch (IntrinsicID) {
11004 default:
11005 llvm_unreachable("Unknown comparison intrinsic.");
11006 case Intrinsic::ppc_altivec_vcmpequq_p:
11007 CompareOpc = 455;
11008 break;
11009 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11010 CompareOpc = 903;
11011 break;
11012 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11013 CompareOpc = 647;
11014 break;
11015 }
11016 isDot = true;
11017 break;
11018 }
11019 return true;
11020}
11021
11022/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11023/// lower, do it, otherwise return null.
11024SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11025 SelectionDAG &DAG) const {
11026 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11027
11028 SDLoc dl(Op);
11029 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11030 // but the builtin provides it as a scalar. To satisfy the instruction
11031 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11032 auto MapNodeWithSplatVector =
11033 [&](unsigned Opcode,
11034 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11035 SDValue SplatVal =
11036 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11037
11038 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11039 Ops.append(ExtraOps.begin(), ExtraOps.end());
11040 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11041 };
11042
11043 switch (IntrinsicID) {
11044 case Intrinsic::thread_pointer:
11045 // Reads the thread pointer register, used for __builtin_thread_pointer.
11046 if (Subtarget.isPPC64())
11047 return DAG.getRegister(PPC::X13, MVT::i64);
11048 return DAG.getRegister(PPC::R2, MVT::i32);
11049
11050 case Intrinsic::ppc_rldimi: {
11051 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11052 SDValue Src = Op.getOperand(1);
11053 APInt Mask = Op.getConstantOperandAPInt(4);
11054 if (Mask.isZero())
11055 return Op.getOperand(2);
11056 if (Mask.isAllOnes())
11057 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11058 uint64_t SH = Op.getConstantOperandVal(3);
11059 unsigned MB = 0, ME = 0;
11060 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11061 report_fatal_error("invalid rldimi mask!");
11062 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11063 if (ME < 63 - SH) {
11064 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11065 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11066 } else if (ME > 63 - SH) {
11067 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11068 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11069 }
11070 return SDValue(
11071 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11072 {Op.getOperand(2), Src,
11073 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11074 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11075 0);
11076 }
11077
11078 case Intrinsic::ppc_rlwimi: {
11079 APInt Mask = Op.getConstantOperandAPInt(4);
11080 if (Mask.isZero())
11081 return Op.getOperand(2);
11082 if (Mask.isAllOnes())
11083 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11084 Op.getOperand(3));
11085 unsigned MB = 0, ME = 0;
11086 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11087 report_fatal_error("invalid rlwimi mask!");
11088 return SDValue(DAG.getMachineNode(
11089 PPC::RLWIMI, dl, MVT::i32,
11090 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11091 DAG.getTargetConstant(MB, dl, MVT::i32),
11092 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11093 0);
11094 }
11095
11096 case Intrinsic::ppc_bcdshift:
11097 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11098 case Intrinsic::ppc_bcdshiftround:
11099 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11100 case Intrinsic::ppc_bcdtruncate:
11101 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11102 case Intrinsic::ppc_bcdunsignedtruncate:
11103 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11104 case Intrinsic::ppc_bcdunsignedshift:
11105 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11106
11107 case Intrinsic::ppc_rlwnm: {
11108 if (Op.getConstantOperandVal(3) == 0)
11109 return DAG.getConstant(0, dl, MVT::i32);
11110 unsigned MB = 0, ME = 0;
11111 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11112 report_fatal_error("invalid rlwnm mask!");
11113 return SDValue(
11114 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11115 {Op.getOperand(1), Op.getOperand(2),
11116 DAG.getTargetConstant(MB, dl, MVT::i32),
11117 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11118 0);
11119 }
11120
11121 case Intrinsic::ppc_mma_disassemble_acc: {
11122 if (Subtarget.isISAFuture()) {
11123 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11124 SDValue WideVec =
11125 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11126 Op.getOperand(1)),
11127 0);
11129 SDValue Value = SDValue(WideVec.getNode(), 0);
11130 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11131
11132 SDValue Extract;
11133 Extract = DAG.getNode(
11134 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11135 Subtarget.isLittleEndian() ? Value2 : Value,
11136 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11137 dl, getPointerTy(DAG.getDataLayout())));
11138 RetOps.push_back(Extract);
11139 Extract = DAG.getNode(
11140 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11141 Subtarget.isLittleEndian() ? Value2 : Value,
11142 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11143 dl, getPointerTy(DAG.getDataLayout())));
11144 RetOps.push_back(Extract);
11145 Extract = DAG.getNode(
11146 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11147 Subtarget.isLittleEndian() ? Value : Value2,
11148 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11149 dl, getPointerTy(DAG.getDataLayout())));
11150 RetOps.push_back(Extract);
11151 Extract = DAG.getNode(
11152 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11153 Subtarget.isLittleEndian() ? Value : Value2,
11154 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11155 dl, getPointerTy(DAG.getDataLayout())));
11156 RetOps.push_back(Extract);
11157 return DAG.getMergeValues(RetOps, dl);
11158 }
11159 [[fallthrough]];
11160 }
11161 case Intrinsic::ppc_vsx_disassemble_pair: {
11162 int NumVecs = 2;
11163 SDValue WideVec = Op.getOperand(1);
11164 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11165 NumVecs = 4;
11166 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11167 }
11169 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11170 SDValue Extract = DAG.getNode(
11171 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11172 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11173 : VecNo,
11174 dl, getPointerTy(DAG.getDataLayout())));
11175 RetOps.push_back(Extract);
11176 }
11177 return DAG.getMergeValues(RetOps, dl);
11178 }
11179
11180 case Intrinsic::ppc_mma_build_dmr: {
11183 for (int i = 1; i < 9; i += 2) {
11184 SDValue Hi = Op.getOperand(i);
11185 SDValue Lo = Op.getOperand(i + 1);
11186 if (Hi->getOpcode() == ISD::LOAD)
11187 Chains.push_back(Hi.getValue(1));
11188 if (Lo->getOpcode() == ISD::LOAD)
11189 Chains.push_back(Lo.getValue(1));
11190 Pairs.push_back(
11191 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11192 }
11193 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11194 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11195 return DAG.getMergeValues({Value, TF}, dl);
11196 }
11197
11198 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11199 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11200 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11201 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11202 "Specify P of 0 or 1 for lower or upper 512 bytes");
11203 unsigned HiLo = Idx->getSExtValue();
11204 unsigned Opcode;
11205 unsigned Subx;
11206 if (HiLo == 0) {
11207 Opcode = PPC::DMXXEXTFDMR512;
11208 Subx = PPC::sub_wacc_lo;
11209 } else {
11210 Opcode = PPC::DMXXEXTFDMR512_HI;
11211 Subx = PPC::sub_wacc_hi;
11212 }
11213 SDValue Subreg(
11214 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11215 Op.getOperand(1),
11216 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11217 0);
11218 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11219 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11220 }
11221
11222 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11223 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11224 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11225 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11226 "Specify a dmr row pair 0-3");
11227 unsigned IdxVal = Idx->getSExtValue();
11228 unsigned Subx;
11229 switch (IdxVal) {
11230 case 0:
11231 Subx = PPC::sub_dmrrowp0;
11232 break;
11233 case 1:
11234 Subx = PPC::sub_dmrrowp1;
11235 break;
11236 case 2:
11237 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11238 break;
11239 case 3:
11240 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11241 break;
11242 }
11243 SDValue Subreg(
11244 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11245 Op.getOperand(1),
11246 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11247 0);
11248 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11249 return SDValue(
11250 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11251 0);
11252 }
11253
11254 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11255 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11256 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11257 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11258 "Specify P of 0 or 1 for lower or upper 512 bytes");
11259 unsigned HiLo = Idx->getSExtValue();
11260 unsigned Opcode;
11261 unsigned Subx;
11262 if (HiLo == 0) {
11263 Opcode = PPCISD::INST512;
11264 Subx = PPC::sub_wacc_lo;
11265 } else {
11266 Opcode = PPCISD::INST512HI;
11267 Subx = PPC::sub_wacc_hi;
11268 }
11269 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11270 Op.getOperand(3));
11271 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11272 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11273 Op.getOperand(1), Wacc, SubReg),
11274 0);
11275 }
11276
11277 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11278 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11279 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11280 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11281 "Specify a dmr row pair 0-3");
11282 unsigned IdxVal = Idx->getSExtValue();
11283 unsigned Subx;
11284 switch (IdxVal) {
11285 case 0:
11286 Subx = PPC::sub_dmrrowp0;
11287 break;
11288 case 1:
11289 Subx = PPC::sub_dmrrowp1;
11290 break;
11291 case 2:
11292 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11293 break;
11294 case 3:
11295 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11296 break;
11297 }
11298 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11299 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11300 SDValue DMRRowp =
11301 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11302 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11303 Op.getOperand(1), DMRRowp, SubReg),
11304 0);
11305 }
11306
11307 case Intrinsic::ppc_mma_xxmfacc:
11308 case Intrinsic::ppc_mma_xxmtacc: {
11309 // Allow pre-isa-future subtargets to lower as normal.
11310 if (!Subtarget.isISAFuture())
11311 return SDValue();
11312 // The intrinsics for xxmtacc and xxmfacc take one argument of
11313 // type v512i1, for future cpu the corresponding wacc instruction
11314 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11315 // the need to produce the xxm[t|f]acc.
11316 SDValue WideVec = Op.getOperand(1);
11317 DAG.ReplaceAllUsesWith(Op, WideVec);
11318 return SDValue();
11319 }
11320
11321 case Intrinsic::ppc_unpack_longdouble: {
11322 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11323 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11324 "Argument of long double unpack must be 0 or 1!");
11325 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11326 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11327 Idx->getValueType(0)));
11328 }
11329
11330 case Intrinsic::ppc_compare_exp_lt:
11331 case Intrinsic::ppc_compare_exp_gt:
11332 case Intrinsic::ppc_compare_exp_eq:
11333 case Intrinsic::ppc_compare_exp_uo: {
11334 unsigned Pred;
11335 switch (IntrinsicID) {
11336 case Intrinsic::ppc_compare_exp_lt:
11337 Pred = PPC::PRED_LT;
11338 break;
11339 case Intrinsic::ppc_compare_exp_gt:
11340 Pred = PPC::PRED_GT;
11341 break;
11342 case Intrinsic::ppc_compare_exp_eq:
11343 Pred = PPC::PRED_EQ;
11344 break;
11345 case Intrinsic::ppc_compare_exp_uo:
11346 Pred = PPC::PRED_UN;
11347 break;
11348 }
11349 return SDValue(
11350 DAG.getMachineNode(
11351 PPC::SELECT_CC_I4, dl, MVT::i32,
11352 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11353 Op.getOperand(1), Op.getOperand(2)),
11354 0),
11355 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11356 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11357 0);
11358 }
11359 case Intrinsic::ppc_test_data_class: {
11360 EVT OpVT = Op.getOperand(1).getValueType();
11361 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11362 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11363 : PPC::XSTSTDCSP);
11364 return SDValue(
11365 DAG.getMachineNode(
11366 PPC::SELECT_CC_I4, dl, MVT::i32,
11367 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11368 Op.getOperand(1)),
11369 0),
11370 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11371 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11372 0);
11373 }
11374 case Intrinsic::ppc_fnmsub: {
11375 EVT VT = Op.getOperand(1).getValueType();
11376 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11377 return DAG.getNode(
11378 ISD::FNEG, dl, VT,
11379 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11380 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11381 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11382 Op.getOperand(2), Op.getOperand(3));
11383 }
11384 case Intrinsic::ppc_convert_f128_to_ppcf128:
11385 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11386 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11387 ? RTLIB::CONVERT_PPCF128_F128
11388 : RTLIB::CONVERT_F128_PPCF128;
11389 MakeLibCallOptions CallOptions;
11390 std::pair<SDValue, SDValue> Result =
11391 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11392 dl, SDValue());
11393 return Result.first;
11394 }
11395 case Intrinsic::ppc_maxfe:
11396 case Intrinsic::ppc_maxfl:
11397 case Intrinsic::ppc_maxfs:
11398 case Intrinsic::ppc_minfe:
11399 case Intrinsic::ppc_minfl:
11400 case Intrinsic::ppc_minfs: {
11401 EVT VT = Op.getValueType();
11402 assert(
11403 all_of(Op->ops().drop_front(4),
11404 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11405 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11406 (void)VT;
11408 if (IntrinsicID == Intrinsic::ppc_minfe ||
11409 IntrinsicID == Intrinsic::ppc_minfl ||
11410 IntrinsicID == Intrinsic::ppc_minfs)
11411 CC = ISD::SETLT;
11412 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11413 SDValue Res = Op.getOperand(I);
11414 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11415 Res =
11416 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11417 }
11418 return Res;
11419 }
11420 }
11421
11422 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11423 // opcode number of the comparison.
11424 int CompareOpc;
11425 bool isDot;
11426 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11427 return SDValue(); // Don't custom lower most intrinsics.
11428
11429 // If this is a non-dot comparison, make the VCMP node and we are done.
11430 if (!isDot) {
11431 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11432 Op.getOperand(1), Op.getOperand(2),
11433 DAG.getConstant(CompareOpc, dl, MVT::i32));
11434 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11435 }
11436
11437 // Create the PPCISD altivec 'dot' comparison node.
11438 SDValue Ops[] = {
11439 Op.getOperand(2), // LHS
11440 Op.getOperand(3), // RHS
11441 DAG.getConstant(CompareOpc, dl, MVT::i32)
11442 };
11443 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11444 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11445
11446 // Unpack the result based on how the target uses it.
11447 unsigned BitNo; // Bit # of CR6.
11448 bool InvertBit; // Invert result?
11449 unsigned Bitx;
11450 unsigned SetOp;
11451 switch (Op.getConstantOperandVal(1)) {
11452 default: // Can't happen, don't crash on invalid number though.
11453 case 0: // Return the value of the EQ bit of CR6.
11454 BitNo = 0;
11455 InvertBit = false;
11456 Bitx = PPC::sub_eq;
11457 SetOp = PPCISD::SETBC;
11458 break;
11459 case 1: // Return the inverted value of the EQ bit of CR6.
11460 BitNo = 0;
11461 InvertBit = true;
11462 Bitx = PPC::sub_eq;
11463 SetOp = PPCISD::SETBCR;
11464 break;
11465 case 2: // Return the value of the LT bit of CR6.
11466 BitNo = 2;
11467 InvertBit = false;
11468 Bitx = PPC::sub_lt;
11469 SetOp = PPCISD::SETBC;
11470 break;
11471 case 3: // Return the inverted value of the LT bit of CR6.
11472 BitNo = 2;
11473 InvertBit = true;
11474 Bitx = PPC::sub_lt;
11475 SetOp = PPCISD::SETBCR;
11476 break;
11477 }
11478
11479 SDValue GlueOp = CompNode.getValue(1);
11480 if (Subtarget.isISA3_1()) {
11481 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11482 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11483 SDValue CRBit =
11484 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11485 CR6Reg, SubRegIdx, GlueOp),
11486 0);
11487 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11488 }
11489
11490 // Now that we have the comparison, emit a copy from the CR to a GPR.
11491 // This is flagged to the above dot comparison.
11492 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11493 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11494
11495 // Shift the bit into the low position.
11496 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11497 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11498 // Isolate the bit.
11499 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11500 DAG.getConstant(1, dl, MVT::i32));
11501
11502 // If we are supposed to, toggle the bit.
11503 if (InvertBit)
11504 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11505 DAG.getConstant(1, dl, MVT::i32));
11506 return Flags;
11507}
11508
11509SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11510 SelectionDAG &DAG) const {
11511 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11512 // the beginning of the argument list.
11513 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11514 SDLoc DL(Op);
11515 switch (Op.getConstantOperandVal(ArgStart)) {
11516 case Intrinsic::ppc_cfence: {
11517 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11518 SDValue Val = Op.getOperand(ArgStart + 1);
11519 EVT Ty = Val.getValueType();
11520 if (Ty == MVT::i128) {
11521 // FIXME: Testing one of two paired registers is sufficient to guarantee
11522 // ordering?
11523 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11524 }
11525 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11526 return SDValue(
11527 DAG.getMachineNode(
11528 Opcode, DL, MVT::Other,
11529 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11530 Op.getOperand(0)),
11531 0);
11532 }
11533 case Intrinsic::ppc_mma_disassemble_dmr: {
11534 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11535 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11536 }
11537 case Intrinsic::ppc_amo_stwat:
11538 case Intrinsic::ppc_amo_stdat: {
11539 SDLoc dl(Op);
11540 SDValue Chain = Op.getOperand(0);
11541 SDValue Ptr = Op.getOperand(ArgStart + 1);
11542 SDValue Val = Op.getOperand(ArgStart + 2);
11543 SDValue FC = Op.getOperand(ArgStart + 3);
11544
11545 return DAG.getNode(PPCISD::STAT, dl, MVT::Other, Chain, Val, Ptr, FC);
11546 }
11547 default:
11548 break;
11549 }
11550 return SDValue();
11551}
11552
11553// Lower scalar BSWAP64 to xxbrd.
11554SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11555 SDLoc dl(Op);
11556 if (!Subtarget.isPPC64())
11557 return Op;
11558 // MTVSRDD
11559 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11560 Op.getOperand(0));
11561 // XXBRD
11562 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11563 // MFVSRD
11564 int VectorIndex = 0;
11565 if (Subtarget.isLittleEndian())
11566 VectorIndex = 1;
11567 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11568 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11569 return Op;
11570}
11571
11572// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11573// compared to a value that is atomically loaded (atomic loads zero-extend).
11574SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11577 "Expecting an atomic compare-and-swap here.");
11578 SDLoc dl(Op);
11579 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11580 EVT MemVT = AtomicNode->getMemoryVT();
11581 if (MemVT.getSizeInBits() >= 32)
11582 return Op;
11583
11584 SDValue CmpOp = Op.getOperand(2);
11585 // If this is already correctly zero-extended, leave it alone.
11586 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11587 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11588 return Op;
11589
11590 // Clear the high bits of the compare operand.
11591 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11592 SDValue NewCmpOp =
11593 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11594 DAG.getConstant(MaskVal, dl, MVT::i32));
11595
11596 // Replace the existing compare operand with the properly zero-extended one.
11598 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11599 Ops.push_back(AtomicNode->getOperand(i));
11600 Ops[2] = NewCmpOp;
11601 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11602 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11603 auto NodeTy =
11604 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11605 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11606}
11607
11608SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11609 SelectionDAG &DAG) const {
11610 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11611 EVT MemVT = N->getMemoryVT();
11612 assert(MemVT.getSimpleVT() == MVT::i128 &&
11613 "Expect quadword atomic operations");
11614 SDLoc dl(N);
11615 unsigned Opc = N->getOpcode();
11616 switch (Opc) {
11617 case ISD::ATOMIC_LOAD: {
11618 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11619 // lowered to ppc instructions by pattern matching instruction selector.
11620 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11622 N->getOperand(0),
11623 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11624 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11625 Ops.push_back(N->getOperand(I));
11626 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11627 Ops, MemVT, N->getMemOperand());
11628 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11629 SDValue ValHi =
11630 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11631 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11632 DAG.getConstant(64, dl, MVT::i32));
11633 SDValue Val =
11634 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11635 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11636 {Val, LoadedVal.getValue(2)});
11637 }
11638 case ISD::ATOMIC_STORE: {
11639 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11640 // lowered to ppc instructions by pattern matching instruction selector.
11641 SDVTList Tys = DAG.getVTList(MVT::Other);
11643 N->getOperand(0),
11644 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11645 SDValue Val = N->getOperand(1);
11646 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11647 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11648 DAG.getConstant(64, dl, MVT::i32));
11649 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11650 Ops.push_back(ValLo);
11651 Ops.push_back(ValHi);
11652 Ops.push_back(N->getOperand(2));
11653 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11654 N->getMemOperand());
11655 }
11656 default:
11657 llvm_unreachable("Unexpected atomic opcode");
11658 }
11659}
11660
11662 SelectionDAG &DAG,
11663 const PPCSubtarget &Subtarget) {
11664 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11665
11666 enum DataClassMask {
11667 DC_NAN = 1 << 6,
11668 DC_NEG_INF = 1 << 4,
11669 DC_POS_INF = 1 << 5,
11670 DC_NEG_ZERO = 1 << 2,
11671 DC_POS_ZERO = 1 << 3,
11672 DC_NEG_SUBNORM = 1,
11673 DC_POS_SUBNORM = 1 << 1,
11674 };
11675
11676 EVT VT = Op.getValueType();
11677
11678 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11679 : VT == MVT::f64 ? PPC::XSTSTDCDP
11680 : PPC::XSTSTDCSP;
11681
11682 if (Mask == fcAllFlags)
11683 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11684 if (Mask == 0)
11685 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11686
11687 // When it's cheaper or necessary to test reverse flags.
11688 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11689 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11690 return DAG.getNOT(Dl, Rev, MVT::i1);
11691 }
11692
11693 // Power doesn't support testing whether a value is 'normal'. Test the rest
11694 // first, and test if it's 'not not-normal' with expected sign.
11695 if (Mask & fcNormal) {
11696 SDValue Rev(DAG.getMachineNode(
11697 TestOp, Dl, MVT::i32,
11698 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11699 DC_NEG_ZERO | DC_POS_ZERO |
11700 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11701 Dl, MVT::i32),
11702 Op),
11703 0);
11704 // Sign are stored in CR bit 0, result are in CR bit 2.
11705 SDValue Sign(
11706 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11707 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11708 0);
11709 SDValue Normal(DAG.getNOT(
11710 Dl,
11712 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11713 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11714 0),
11715 MVT::i1));
11716 if (Mask & fcPosNormal)
11717 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11718 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11719 if (Mask == fcPosNormal || Mask == fcNegNormal)
11720 return Result;
11721
11722 return DAG.getNode(
11723 ISD::OR, Dl, MVT::i1,
11724 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11725 }
11726
11727 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11728 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11729 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11730 bool IsQuiet = Mask & fcQNan;
11731 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11732
11733 // Quietness is determined by the first bit in fraction field.
11734 uint64_t QuietMask = 0;
11735 SDValue HighWord;
11736 if (VT == MVT::f128) {
11737 HighWord = DAG.getNode(
11738 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11739 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11740 QuietMask = 0x8000;
11741 } else if (VT == MVT::f64) {
11742 if (Subtarget.isPPC64()) {
11743 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11744 DAG.getBitcast(MVT::i64, Op),
11745 DAG.getConstant(1, Dl, MVT::i32));
11746 } else {
11747 SDValue Vec = DAG.getBitcast(
11748 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11749 HighWord = DAG.getNode(
11750 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11751 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11752 }
11753 QuietMask = 0x80000;
11754 } else if (VT == MVT::f32) {
11755 HighWord = DAG.getBitcast(MVT::i32, Op);
11756 QuietMask = 0x400000;
11757 }
11758 SDValue NanRes = DAG.getSetCC(
11759 Dl, MVT::i1,
11760 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11761 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11762 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11763 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11764 if (Mask == fcQNan || Mask == fcSNan)
11765 return NanRes;
11766
11767 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11768 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11769 NanRes);
11770 }
11771
11772 unsigned NativeMask = 0;
11773 if ((Mask & fcNan) == fcNan)
11774 NativeMask |= DC_NAN;
11775 if (Mask & fcNegInf)
11776 NativeMask |= DC_NEG_INF;
11777 if (Mask & fcPosInf)
11778 NativeMask |= DC_POS_INF;
11779 if (Mask & fcNegZero)
11780 NativeMask |= DC_NEG_ZERO;
11781 if (Mask & fcPosZero)
11782 NativeMask |= DC_POS_ZERO;
11783 if (Mask & fcNegSubnormal)
11784 NativeMask |= DC_NEG_SUBNORM;
11785 if (Mask & fcPosSubnormal)
11786 NativeMask |= DC_POS_SUBNORM;
11787 return SDValue(
11788 DAG.getMachineNode(
11789 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11791 TestOp, Dl, MVT::i32,
11792 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11793 0),
11794 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11795 0);
11796}
11797
11798SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11799 SelectionDAG &DAG) const {
11800 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11801 SDValue LHS = Op.getOperand(0);
11802 uint64_t RHSC = Op.getConstantOperandVal(1);
11803 SDLoc Dl(Op);
11804 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11805 if (LHS.getValueType() == MVT::ppcf128) {
11806 // The higher part determines the value class.
11807 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11808 DAG.getConstant(1, Dl, MVT::i32));
11809 }
11810
11811 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11812}
11813
11814// Adjust the length value for a load/store with length to account for the
11815// instructions requiring a left justified length, and for non-byte element
11816// types requiring scaling by element size.
11817static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11818 SelectionDAG &DAG) {
11819 SDLoc dl(Val);
11820 EVT VT = Val->getValueType(0);
11821 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11822 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11823 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11824 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11825}
11826
11827SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11828 auto VPLD = cast<VPLoadSDNode>(Op);
11829 bool Future = Subtarget.isISAFuture();
11830 SDLoc dl(Op);
11831 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11832 "Mask predication not supported");
11833 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11834 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11835 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11836 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11837 Len = AdjustLength(Len, EltBits, !Future, DAG);
11838 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11839 VPLD->getOperand(1), Len};
11840 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11841 SDValue VPL =
11843 VPLD->getMemoryVT(), VPLD->getMemOperand());
11844 return VPL;
11845}
11846
11847SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11848 auto VPST = cast<VPStoreSDNode>(Op);
11849 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11850 "Mask predication not supported");
11851 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11852 SDLoc dl(Op);
11853 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11854 unsigned EltBits =
11855 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11856 bool Future = Subtarget.isISAFuture();
11857 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11858 Len = AdjustLength(Len, EltBits, !Future, DAG);
11859 SDValue Ops[] = {
11860 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11861 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11862 VPST->getOperand(2), Len};
11863 SDVTList Tys = DAG.getVTList(MVT::Other);
11864 SDValue VPS =
11866 VPST->getMemoryVT(), VPST->getMemOperand());
11867 return VPS;
11868}
11869
11870SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11871 SelectionDAG &DAG) const {
11872 SDLoc dl(Op);
11873
11874 MachineFunction &MF = DAG.getMachineFunction();
11875 SDValue Op0 = Op.getOperand(0);
11876 EVT ValVT = Op0.getValueType();
11877 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11878 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11879 int64_t IntVal = Op.getConstantOperandVal(0);
11880 if (IntVal >= -16 && IntVal <= 15)
11881 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11882 dl);
11883 }
11884
11885 ReuseLoadInfo RLI;
11886 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11887 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11888 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11889 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11890
11891 MachineMemOperand *MMO =
11893 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11894 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11896 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11897 MVT::i32, MMO);
11898 if (RLI.ResChain)
11899 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11900 return Bits.getValue(0);
11901 }
11902
11903 // Create a stack slot that is 16-byte aligned.
11904 MachineFrameInfo &MFI = MF.getFrameInfo();
11905 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11906 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11907 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11908
11909 SDValue Val = Op0;
11910 // P10 hardware store forwarding requires that a single store contains all
11911 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11912 // to avoid load hit store on P10 when running binaries compiled for older
11913 // processors by generating two mergeable scalar stores to forward with the
11914 // vector load.
11915 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11916 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11917 ValVT.getSizeInBits() <= 64) {
11918 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11919 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11920 SDValue ShiftBy = DAG.getConstant(
11921 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11922 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11923 SDValue Plus8 =
11924 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11925 SDValue Store2 =
11926 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11927 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11928 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11929 MachinePointerInfo());
11930 }
11931
11932 // Store the input value into Value#0 of the stack slot.
11933 SDValue Store =
11934 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11935 // Load it out.
11936 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11937}
11938
11939SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11940 SelectionDAG &DAG) const {
11941 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11942 "Should only be called for ISD::INSERT_VECTOR_ELT");
11943
11944 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11945
11946 EVT VT = Op.getValueType();
11947 SDLoc dl(Op);
11948 SDValue V1 = Op.getOperand(0);
11949 SDValue V2 = Op.getOperand(1);
11950
11951 if (VT == MVT::v2f64 && C)
11952 return Op;
11953
11954 if (Subtarget.hasP9Vector()) {
11955 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11956 // because on P10, it allows this specific insert_vector_elt load pattern to
11957 // utilize the refactored load and store infrastructure in order to exploit
11958 // prefixed loads.
11959 // On targets with inexpensive direct moves (Power9 and up), a
11960 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11961 // load since a single precision load will involve conversion to double
11962 // precision on the load followed by another conversion to single precision.
11963 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11964 (isa<LoadSDNode>(V2))) {
11965 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11966 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11967 SDValue InsVecElt =
11968 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11969 BitcastLoad, Op.getOperand(2));
11970 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11971 }
11972 }
11973
11974 if (Subtarget.isISA3_1()) {
11975 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11976 return SDValue();
11977 // On P10, we have legal lowering for constant and variable indices for
11978 // all vectors.
11979 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11980 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11981 return Op;
11982 }
11983
11984 // Before P10, we have legal lowering for constant indices but not for
11985 // variable ones.
11986 if (!C)
11987 return SDValue();
11988
11989 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11990 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11991 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11992 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11993 unsigned InsertAtElement = C->getZExtValue();
11994 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11995 if (Subtarget.isLittleEndian()) {
11996 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11997 }
11998 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11999 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12000 }
12001 return Op;
12002}
12003
12004SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12005 SelectionDAG &DAG) const {
12006 SDLoc dl(Op);
12007 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12008 SDValue LoadChain = LN->getChain();
12009 SDValue BasePtr = LN->getBasePtr();
12010 EVT VT = Op.getValueType();
12011 bool IsV1024i1 = VT == MVT::v1024i1;
12012 bool IsV2048i1 = VT == MVT::v2048i1;
12013
12014 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12015 // Dense Math dmr pair registers, respectively.
12016 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12017 (void)IsV2048i1;
12018 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12019 "Dense Math support required.");
12020 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12021
12023 SmallVector<SDValue, 8> LoadChains;
12024
12025 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12026 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12027 MachineMemOperand *MMO = LN->getMemOperand();
12028 unsigned NumVecs = VT.getSizeInBits() / 256;
12029 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12030 MachineMemOperand *NewMMO =
12031 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12032 if (Idx > 0) {
12033 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12034 DAG.getConstant(32, dl, BasePtr.getValueType()));
12035 LoadOps[2] = BasePtr;
12036 }
12038 DAG.getVTList(MVT::v256i1, MVT::Other),
12039 LoadOps, MVT::v256i1, NewMMO);
12040 LoadChains.push_back(Ld.getValue(1));
12041 Loads.push_back(Ld);
12042 }
12043
12044 if (Subtarget.isLittleEndian()) {
12045 std::reverse(Loads.begin(), Loads.end());
12046 std::reverse(LoadChains.begin(), LoadChains.end());
12047 }
12048
12049 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12050 SDValue Lo =
12051 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Loads[0], Loads[1]);
12052 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12053 SDValue Hi =
12054 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Loads[2], Loads[3]);
12055 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12056 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12057 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12058
12059 SDValue Value =
12060 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12061
12062 if (IsV1024i1) {
12063 return DAG.getMergeValues({Value, TF}, dl);
12064 }
12065
12066 // Handle Loads for V2048i1 which represents a dmr pair.
12067 SDValue DmrPValue;
12068 SDValue Dmr1Lo =
12069 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Loads[4], Loads[5]);
12070 SDValue Dmr1Hi =
12071 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Loads[6], Loads[7]);
12072 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12073 SDValue Dmr1Value = SDValue(
12074 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12075
12076 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12077 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12078
12079 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12080 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12081
12082 DmrPValue = SDValue(
12083 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12084
12085 return DAG.getMergeValues({DmrPValue, TF}, dl);
12086}
12087
12088SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12089 const SDLoc &dl,
12090 SelectionDAG &DAG) const {
12091 SDValue Lo =
12092 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12093 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12094 SDValue Hi =
12095 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12096 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12097 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12098
12099 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12100 {RC, Lo, LoSub, Hi, HiSub}),
12101 0);
12102}
12103
12104SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12105 SelectionDAG &DAG) const {
12106 SDLoc dl(Op);
12107 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12108 SDValue LoadChain = LN->getChain();
12109 SDValue BasePtr = LN->getBasePtr();
12110 EVT VT = Op.getValueType();
12111
12112 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12113 return LowerDMFVectorLoad(Op, DAG);
12114
12115 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12116 return Op;
12117
12118 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12119 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12120 // 2 or 4 vsx registers.
12121 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12122 "Type unsupported without MMA");
12123 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12124 "Type unsupported without paired vector support");
12125 Align Alignment = LN->getAlign();
12127 SmallVector<SDValue, 4> LoadChains;
12128 unsigned NumVecs = VT.getSizeInBits() / 128;
12129 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12130 SDValue Load =
12131 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12132 LN->getPointerInfo().getWithOffset(Idx * 16),
12133 commonAlignment(Alignment, Idx * 16),
12134 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12135 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12136 DAG.getConstant(16, dl, BasePtr.getValueType()));
12137 Loads.push_back(Load);
12138 LoadChains.push_back(Load.getValue(1));
12139 }
12140 if (Subtarget.isLittleEndian()) {
12141 std::reverse(Loads.begin(), Loads.end());
12142 std::reverse(LoadChains.begin(), LoadChains.end());
12143 }
12144 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12145 SDValue Value =
12146 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12147 dl, VT, Loads);
12148 SDValue RetOps[] = {Value, TF};
12149 return DAG.getMergeValues(RetOps, dl);
12150}
12151
12152SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12153 SelectionDAG &DAG) const {
12154
12155 SDLoc dl(Op);
12156 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12157 SDValue StoreChain = SN->getChain();
12158 SDValue BasePtr = SN->getBasePtr();
12161 EVT VT = SN->getValue().getValueType();
12162 bool IsV1024i1 = VT == MVT::v1024i1;
12163 bool IsV2048i1 = VT == MVT::v2048i1;
12164
12165 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12166 // Dense Math dmr pair registers, respectively.
12167 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12168 (void)IsV2048i1;
12169 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12170 "Dense Math support required.");
12171 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12172
12173 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12174 if (IsV1024i1) {
12176 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12177 Op.getOperand(1),
12178 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12179 0);
12181 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12182 Op.getOperand(1),
12183 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12184 0);
12185 MachineSDNode *ExtNode =
12186 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12187 Values.push_back(SDValue(ExtNode, 0));
12188 Values.push_back(SDValue(ExtNode, 1));
12189 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12190 Values.push_back(SDValue(ExtNode, 0));
12191 Values.push_back(SDValue(ExtNode, 1));
12192 } else {
12193 // This corresponds to v2048i1 which represents a dmr pair.
12194 SDValue Dmr0(
12195 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12196 Op.getOperand(1),
12197 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12198 0);
12199
12200 SDValue Dmr1(
12201 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12202 Op.getOperand(1),
12203 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12204 0);
12205
12206 SDValue Dmr0Lo(DAG.getMachineNode(
12207 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12208 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12209 0);
12210
12211 SDValue Dmr0Hi(DAG.getMachineNode(
12212 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12213 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12214 0);
12215
12216 SDValue Dmr1Lo(DAG.getMachineNode(
12217 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12218 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12219 0);
12220
12221 SDValue Dmr1Hi(DAG.getMachineNode(
12222 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12223 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12224 0);
12225
12226 MachineSDNode *ExtNode =
12227 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12228 Values.push_back(SDValue(ExtNode, 0));
12229 Values.push_back(SDValue(ExtNode, 1));
12230 ExtNode =
12231 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12232 Values.push_back(SDValue(ExtNode, 0));
12233 Values.push_back(SDValue(ExtNode, 1));
12234 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12235 Values.push_back(SDValue(ExtNode, 0));
12236 Values.push_back(SDValue(ExtNode, 1));
12237 ExtNode =
12238 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12239 Values.push_back(SDValue(ExtNode, 0));
12240 Values.push_back(SDValue(ExtNode, 1));
12241 }
12242
12243 if (Subtarget.isLittleEndian())
12244 std::reverse(Values.begin(), Values.end());
12245
12246 SDVTList Tys = DAG.getVTList(MVT::Other);
12248 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12249 Values[0], BasePtr};
12250 MachineMemOperand *MMO = SN->getMemOperand();
12251 unsigned NumVecs = VT.getSizeInBits() / 256;
12252 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12253 MachineMemOperand *NewMMO =
12254 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12255 if (Idx > 0) {
12256 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12257 DAG.getConstant(32, dl, BasePtr.getValueType()));
12258 Ops[3] = BasePtr;
12259 }
12260 Ops[2] = Values[Idx];
12262 MVT::v256i1, NewMMO);
12263 Stores.push_back(St);
12264 }
12265
12266 SDValue TF = DAG.getTokenFactor(dl, Stores);
12267 return TF;
12268}
12269
12270SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12271 SelectionDAG &DAG) const {
12272 SDLoc dl(Op);
12273 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12274 SDValue StoreChain = SN->getChain();
12275 SDValue BasePtr = SN->getBasePtr();
12276 SDValue Value = SN->getValue();
12277 SDValue Value2 = SN->getValue();
12278 EVT StoreVT = Value.getValueType();
12279
12280 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12281 return LowerDMFVectorStore(Op, DAG);
12282
12283 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12284 return Op;
12285
12286 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12287 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12288 // underlying registers individually.
12289 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12290 "Type unsupported without MMA");
12291 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12292 "Type unsupported without paired vector support");
12293 Align Alignment = SN->getAlign();
12295 unsigned NumVecs = 2;
12296 if (StoreVT == MVT::v512i1) {
12297 if (Subtarget.isISAFuture()) {
12298 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12299 MachineSDNode *ExtNode = DAG.getMachineNode(
12300 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12301
12302 Value = SDValue(ExtNode, 0);
12303 Value2 = SDValue(ExtNode, 1);
12304 } else
12305 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12306 NumVecs = 4;
12307 }
12308 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12309 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12310 SDValue Elt;
12311 if (Subtarget.isISAFuture()) {
12312 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12313 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12314 Idx > 1 ? Value2 : Value,
12315 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12316 } else
12317 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12318 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12319
12320 SDValue Store =
12321 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12322 SN->getPointerInfo().getWithOffset(Idx * 16),
12323 commonAlignment(Alignment, Idx * 16),
12324 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12325 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12326 DAG.getConstant(16, dl, BasePtr.getValueType()));
12327 Stores.push_back(Store);
12328 }
12329 SDValue TF = DAG.getTokenFactor(dl, Stores);
12330 return TF;
12331}
12332
12333SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12334 SDLoc dl(Op);
12335 if (Op.getValueType() == MVT::v4i32) {
12336 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12337
12338 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12339 // +16 as shift amt.
12340 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12341 SDValue RHSSwap = // = vrlw RHS, 16
12342 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12343
12344 // Shrinkify inputs to v8i16.
12345 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12346 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12347 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12348
12349 // Low parts multiplied together, generating 32-bit results (we ignore the
12350 // top parts).
12351 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12352 LHS, RHS, DAG, dl, MVT::v4i32);
12353
12354 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12355 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12356 // Shift the high parts up 16 bits.
12357 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12358 Neg16, DAG, dl);
12359 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12360 } else if (Op.getValueType() == MVT::v16i8) {
12361 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12362 bool isLittleEndian = Subtarget.isLittleEndian();
12363
12364 // Multiply the even 8-bit parts, producing 16-bit sums.
12365 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12366 LHS, RHS, DAG, dl, MVT::v8i16);
12367 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12368
12369 // Multiply the odd 8-bit parts, producing 16-bit sums.
12370 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12371 LHS, RHS, DAG, dl, MVT::v8i16);
12372 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12373
12374 // Merge the results together. Because vmuleub and vmuloub are
12375 // instructions with a big-endian bias, we must reverse the
12376 // element numbering and reverse the meaning of "odd" and "even"
12377 // when generating little endian code.
12378 int Ops[16];
12379 for (unsigned i = 0; i != 8; ++i) {
12380 if (isLittleEndian) {
12381 Ops[i*2 ] = 2*i;
12382 Ops[i*2+1] = 2*i+16;
12383 } else {
12384 Ops[i*2 ] = 2*i+1;
12385 Ops[i*2+1] = 2*i+1+16;
12386 }
12387 }
12388 if (isLittleEndian)
12389 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12390 else
12391 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12392 } else {
12393 llvm_unreachable("Unknown mul to lower!");
12394 }
12395}
12396
12397SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12398 bool IsStrict = Op->isStrictFPOpcode();
12399 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12400 !Subtarget.hasP9Vector())
12401 return SDValue();
12402
12403 return Op;
12404}
12405
12406// Custom lowering for fpext vf32 to v2f64
12407SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12408
12409 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12410 "Should only be called for ISD::FP_EXTEND");
12411
12412 // FIXME: handle extends from half precision float vectors on P9.
12413 // We only want to custom lower an extend from v2f32 to v2f64.
12414 if (Op.getValueType() != MVT::v2f64 ||
12415 Op.getOperand(0).getValueType() != MVT::v2f32)
12416 return SDValue();
12417
12418 SDLoc dl(Op);
12419 SDValue Op0 = Op.getOperand(0);
12420
12421 switch (Op0.getOpcode()) {
12422 default:
12423 return SDValue();
12425 assert(Op0.getNumOperands() == 2 &&
12427 "Node should have 2 operands with second one being a constant!");
12428
12429 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12430 return SDValue();
12431
12432 // Custom lower is only done for high or low doubleword.
12433 int Idx = Op0.getConstantOperandVal(1);
12434 if (Idx % 2 != 0)
12435 return SDValue();
12436
12437 // Since input is v4f32, at this point Idx is either 0 or 2.
12438 // Shift to get the doubleword position we want.
12439 int DWord = Idx >> 1;
12440
12441 // High and low word positions are different on little endian.
12442 if (Subtarget.isLittleEndian())
12443 DWord ^= 0x1;
12444
12445 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12446 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12447 }
12448 case ISD::FADD:
12449 case ISD::FMUL:
12450 case ISD::FSUB: {
12451 SDValue NewLoad[2];
12452 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12453 // Ensure both input are loads.
12454 SDValue LdOp = Op0.getOperand(i);
12455 if (LdOp.getOpcode() != ISD::LOAD)
12456 return SDValue();
12457 // Generate new load node.
12458 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12459 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12460 NewLoad[i] = DAG.getMemIntrinsicNode(
12461 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12462 LD->getMemoryVT(), LD->getMemOperand());
12463 }
12464 SDValue NewOp =
12465 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12466 NewLoad[1], Op0.getNode()->getFlags());
12467 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12468 DAG.getConstant(0, dl, MVT::i32));
12469 }
12470 case ISD::LOAD: {
12471 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12472 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12473 SDValue NewLd = DAG.getMemIntrinsicNode(
12474 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12475 LD->getMemoryVT(), LD->getMemOperand());
12476 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12477 DAG.getConstant(0, dl, MVT::i32));
12478 }
12479 }
12480 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12481}
12482
12484 SelectionDAG &DAG,
12485 const PPCSubtarget &STI) {
12486 SDLoc DL(Value);
12487 if (STI.useCRBits())
12488 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12489 DAG.getConstant(1, DL, SumType),
12490 DAG.getConstant(0, DL, SumType));
12491 else
12492 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12493 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12494 Value, DAG.getAllOnesConstant(DL, SumType));
12495 return Sum.getValue(1);
12496}
12497
12499 EVT CarryType, SelectionDAG &DAG,
12500 const PPCSubtarget &STI) {
12501 SDLoc DL(Flag);
12502 SDValue Zero = DAG.getConstant(0, DL, SumType);
12503 SDValue Carry = DAG.getNode(
12504 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12505 if (STI.useCRBits())
12506 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12507 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12508}
12509
12510SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12511
12512 SDLoc DL(Op);
12513 SDNode *N = Op.getNode();
12514 EVT VT = N->getValueType(0);
12515 EVT CarryType = N->getValueType(1);
12516 unsigned Opc = N->getOpcode();
12517 bool IsAdd = Opc == ISD::UADDO;
12518 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12519 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12520 N->getOperand(0), N->getOperand(1));
12521 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12522 DAG, Subtarget);
12523 if (!IsAdd)
12524 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12525 DAG.getConstant(1UL, DL, CarryType));
12526 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12527}
12528
12529SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12530 SelectionDAG &DAG) const {
12531 SDLoc DL(Op);
12532 SDNode *N = Op.getNode();
12533 unsigned Opc = N->getOpcode();
12534 EVT VT = N->getValueType(0);
12535 EVT CarryType = N->getValueType(1);
12536 SDValue CarryOp = N->getOperand(2);
12537 bool IsAdd = Opc == ISD::UADDO_CARRY;
12538 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12539 if (!IsAdd)
12540 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12541 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12542 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12543 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12544 Op.getOperand(0), Op.getOperand(1), CarryOp);
12545 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12546 Subtarget);
12547 if (!IsAdd)
12548 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12549 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12550 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12551}
12552
12553SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12554
12555 SDLoc dl(Op);
12556 SDValue LHS = Op.getOperand(0);
12557 SDValue RHS = Op.getOperand(1);
12558 EVT VT = Op.getNode()->getValueType(0);
12559
12560 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12561
12562 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12563 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12564
12565 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12566
12567 SDValue Overflow =
12568 DAG.getNode(ISD::SRL, dl, VT, And,
12569 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12570
12571 SDValue OverflowTrunc =
12572 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12573
12574 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12575}
12576
12577/// Implements signed add with overflow detection using the rule:
12578/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12579SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12580
12581 SDLoc dl(Op);
12582 SDValue LHS = Op.getOperand(0);
12583 SDValue RHS = Op.getOperand(1);
12584 EVT VT = Op.getNode()->getValueType(0);
12585
12586 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12587
12588 // Compute ~(x xor y)
12589 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12590 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12591 // Compute (s xor x)
12592 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12593
12594 // overflow = (x eqv y) & (s xor x)
12595 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12596
12597 // Shift sign bit down to LSB
12598 SDValue Overflow =
12599 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12600 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12601 // Truncate to the overflow type (i1)
12602 SDValue OverflowTrunc =
12603 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12604
12605 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12606}
12607
12608// Lower unsigned 3-way compare producing -1/0/1.
12609SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12610 SDLoc DL(Op);
12611 SDValue A = DAG.getFreeze(Op.getOperand(0));
12612 SDValue B = DAG.getFreeze(Op.getOperand(1));
12613 EVT OpVT = A.getValueType(); // operand type
12614 EVT ResVT = Op.getValueType(); // result type
12615
12616 // First compute diff = A - B (will become subf).
12617 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12618
12619 // Generate B - A using SUBC to capture carry.
12620 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12621 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12622 SDValue CA0 = SubC.getValue(1);
12623
12624 // t2 = A - B + CA0 using SUBE.
12625 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12626 SDValue CA1 = SubE1.getValue(1);
12627
12628 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12629 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12630
12631 // Extract the first result and truncate to result type if needed
12632 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12633}
12634
12635/// LowerOperation - Provide custom lowering hooks for some operations.
12636///
12638 switch (Op.getOpcode()) {
12639 default:
12640 llvm_unreachable("Wasn't expecting to be able to lower this!");
12641 case ISD::FPOW: return lowerPow(Op, DAG);
12642 case ISD::FSIN: return lowerSin(Op, DAG);
12643 case ISD::FCOS: return lowerCos(Op, DAG);
12644 case ISD::FLOG: return lowerLog(Op, DAG);
12645 case ISD::FLOG10: return lowerLog10(Op, DAG);
12646 case ISD::FEXP: return lowerExp(Op, DAG);
12647 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12648 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12649 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12650 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12651 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12652 case ISD::STRICT_FSETCC:
12654 case ISD::SETCC: return LowerSETCC(Op, DAG);
12655 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12656 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12657 case ISD::SSUBO:
12658 return LowerSSUBO(Op, DAG);
12659 case ISD::SADDO:
12660 return LowerSADDO(Op, DAG);
12661
12662 case ISD::INLINEASM:
12663 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12664 // Variable argument lowering.
12665 case ISD::VASTART: return LowerVASTART(Op, DAG);
12666 case ISD::VAARG: return LowerVAARG(Op, DAG);
12667 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12668
12669 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12670 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12672 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12673
12674 // Exception handling lowering.
12675 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12676 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12677 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12678
12679 case ISD::LOAD: return LowerLOAD(Op, DAG);
12680 case ISD::STORE: return LowerSTORE(Op, DAG);
12681 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12682 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12685 case ISD::FP_TO_UINT:
12686 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12689 case ISD::UINT_TO_FP:
12690 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12691 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12692 case ISD::SET_ROUNDING:
12693 return LowerSET_ROUNDING(Op, DAG);
12694
12695 // Lower 64-bit shifts.
12696 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12697 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12698 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12699
12700 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12701 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12702
12703 // Vector-related lowering.
12704 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12705 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12706 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12707 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12708 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12709 case ISD::MUL: return LowerMUL(Op, DAG);
12710 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12712 case ISD::FP_ROUND:
12713 return LowerFP_ROUND(Op, DAG);
12714 case ISD::ROTL: return LowerROTL(Op, DAG);
12715
12716 // For counter-based loop handling.
12718 return SDValue();
12719
12720 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12721
12722 // Frame & Return address.
12723 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12724 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12725
12727 return LowerINTRINSIC_VOID(Op, DAG);
12728 case ISD::BSWAP:
12729 return LowerBSWAP(Op, DAG);
12731 return LowerATOMIC_CMP_SWAP(Op, DAG);
12732 case ISD::ATOMIC_STORE:
12733 return LowerATOMIC_LOAD_STORE(Op, DAG);
12734 case ISD::IS_FPCLASS:
12735 return LowerIS_FPCLASS(Op, DAG);
12736 case ISD::UADDO:
12737 case ISD::USUBO:
12738 return LowerADDSUBO(Op, DAG);
12739 case ISD::UADDO_CARRY:
12740 case ISD::USUBO_CARRY:
12741 return LowerADDSUBO_CARRY(Op, DAG);
12742 case ISD::UCMP:
12743 return LowerUCMP(Op, DAG);
12744 case ISD::STRICT_LRINT:
12745 case ISD::STRICT_LLRINT:
12746 case ISD::STRICT_LROUND:
12749 if (Op->getFlags().hasNoFPExcept())
12750 return Op;
12751 return SDValue();
12752 case ISD::VP_LOAD:
12753 return LowerVP_LOAD(Op, DAG);
12754 case ISD::VP_STORE:
12755 return LowerVP_STORE(Op, DAG);
12756 }
12757}
12758
12761 SelectionDAG &DAG) const {
12762 SDLoc dl(N);
12763 switch (N->getOpcode()) {
12764 default:
12765 llvm_unreachable("Do not know how to custom type legalize this operation!");
12766 case ISD::ATOMIC_LOAD: {
12767 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12768 Results.push_back(Res);
12769 Results.push_back(Res.getValue(1));
12770 break;
12771 }
12772 case ISD::READCYCLECOUNTER: {
12773 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12774 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12775
12776 Results.push_back(
12777 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12778 Results.push_back(RTB.getValue(2));
12779 break;
12780 }
12782 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12783 break;
12784
12785 assert(N->getValueType(0) == MVT::i1 &&
12786 "Unexpected result type for CTR decrement intrinsic");
12787 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12788 N->getValueType(0));
12789 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12790 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12791 N->getOperand(1));
12792
12793 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12794 Results.push_back(NewInt.getValue(1));
12795 break;
12796 }
12798 switch (N->getConstantOperandVal(0)) {
12799 case Intrinsic::ppc_pack_longdouble:
12800 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12801 N->getOperand(2), N->getOperand(1)));
12802 break;
12803 case Intrinsic::ppc_maxfe:
12804 case Intrinsic::ppc_minfe:
12805 case Intrinsic::ppc_fnmsub:
12806 case Intrinsic::ppc_convert_f128_to_ppcf128:
12807 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12808 break;
12809 }
12810 break;
12811 }
12812 case ISD::VAARG: {
12813 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12814 return;
12815
12816 EVT VT = N->getValueType(0);
12817
12818 if (VT == MVT::i64) {
12819 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12820
12821 Results.push_back(NewNode);
12822 Results.push_back(NewNode.getValue(1));
12823 }
12824 return;
12825 }
12828 case ISD::FP_TO_SINT:
12829 case ISD::FP_TO_UINT: {
12830 // LowerFP_TO_INT() can only handle f32 and f64.
12831 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12832 MVT::ppcf128)
12833 return;
12834 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12835 Results.push_back(LoweredValue);
12836 if (N->isStrictFPOpcode())
12837 Results.push_back(LoweredValue.getValue(1));
12838 return;
12839 }
12840 case ISD::TRUNCATE: {
12841 if (!N->getValueType(0).isVector())
12842 return;
12843 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12844 if (Lowered)
12845 Results.push_back(Lowered);
12846 return;
12847 }
12848 case ISD::SCALAR_TO_VECTOR: {
12849 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12850 if (Lowered)
12851 Results.push_back(Lowered);
12852 return;
12853 }
12854 case ISD::FSHL:
12855 case ISD::FSHR:
12856 // Don't handle funnel shifts here.
12857 return;
12858 case ISD::BITCAST:
12859 // Don't handle bitcast here.
12860 return;
12861 case ISD::FP_EXTEND:
12862 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12863 if (Lowered)
12864 Results.push_back(Lowered);
12865 return;
12866 }
12867}
12868
12869//===----------------------------------------------------------------------===//
12870// Other Lowering Code
12871//===----------------------------------------------------------------------===//
12872
12874 return Builder.CreateIntrinsic(Id, {});
12875}
12876
12878 Value *Addr,
12879 AtomicOrdering Ord) const {
12880 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12881
12882 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12883 "Only 8/16/32/64-bit atomic loads supported");
12884 Intrinsic::ID IntID;
12885 switch (SZ) {
12886 default:
12887 llvm_unreachable("Unexpected PrimitiveSize");
12888 case 8:
12889 IntID = Intrinsic::ppc_lbarx;
12890 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12891 break;
12892 case 16:
12893 IntID = Intrinsic::ppc_lharx;
12894 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12895 break;
12896 case 32:
12897 IntID = Intrinsic::ppc_lwarx;
12898 break;
12899 case 64:
12900 IntID = Intrinsic::ppc_ldarx;
12901 break;
12902 }
12903 Value *Call =
12904 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12905
12906 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12907}
12908
12909// Perform a store-conditional operation to Addr. Return the status of the
12910// store. This should be 0 if the store succeeded, non-zero otherwise.
12912 Value *Val, Value *Addr,
12913 AtomicOrdering Ord) const {
12914 Type *Ty = Val->getType();
12915 unsigned SZ = Ty->getPrimitiveSizeInBits();
12916
12917 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12918 "Only 8/16/32/64-bit atomic loads supported");
12919 Intrinsic::ID IntID;
12920 switch (SZ) {
12921 default:
12922 llvm_unreachable("Unexpected PrimitiveSize");
12923 case 8:
12924 IntID = Intrinsic::ppc_stbcx;
12925 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12926 break;
12927 case 16:
12928 IntID = Intrinsic::ppc_sthcx;
12929 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12930 break;
12931 case 32:
12932 IntID = Intrinsic::ppc_stwcx;
12933 break;
12934 case 64:
12935 IntID = Intrinsic::ppc_stdcx;
12936 break;
12937 }
12938
12939 if (SZ == 8 || SZ == 16)
12940 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12941
12942 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12943 /*FMFSource=*/nullptr, "stcx");
12944 return Builder.CreateXor(Call, Builder.getInt32(1));
12945}
12946
12947// The mappings for emitLeading/TrailingFence is taken from
12948// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12950 Instruction *Inst,
12951 AtomicOrdering Ord) const {
12953 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12954 if (isReleaseOrStronger(Ord))
12955 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12956 return nullptr;
12957}
12958
12960 Instruction *Inst,
12961 AtomicOrdering Ord) const {
12962 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12963 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12964 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12965 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12966 if (isa<LoadInst>(Inst))
12967 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12968 {Inst});
12969 // FIXME: Can use isync for rmw operation.
12970 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12971 }
12972 return nullptr;
12973}
12974
12977 unsigned AtomicSize,
12978 unsigned BinOpcode,
12979 unsigned CmpOpcode,
12980 unsigned CmpPred) const {
12981 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12982 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12983
12984 auto LoadMnemonic = PPC::LDARX;
12985 auto StoreMnemonic = PPC::STDCX;
12986 switch (AtomicSize) {
12987 default:
12988 llvm_unreachable("Unexpected size of atomic entity");
12989 case 1:
12990 LoadMnemonic = PPC::LBARX;
12991 StoreMnemonic = PPC::STBCX;
12992 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12993 break;
12994 case 2:
12995 LoadMnemonic = PPC::LHARX;
12996 StoreMnemonic = PPC::STHCX;
12997 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12998 break;
12999 case 4:
13000 LoadMnemonic = PPC::LWARX;
13001 StoreMnemonic = PPC::STWCX;
13002 break;
13003 case 8:
13004 LoadMnemonic = PPC::LDARX;
13005 StoreMnemonic = PPC::STDCX;
13006 break;
13007 }
13008
13009 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13010 MachineFunction *F = BB->getParent();
13012
13013 Register dest = MI.getOperand(0).getReg();
13014 Register ptrA = MI.getOperand(1).getReg();
13015 Register ptrB = MI.getOperand(2).getReg();
13016 Register incr = MI.getOperand(3).getReg();
13017 DebugLoc dl = MI.getDebugLoc();
13018
13019 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13020 MachineBasicBlock *loop2MBB =
13021 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13022 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13023 F->insert(It, loopMBB);
13024 if (CmpOpcode)
13025 F->insert(It, loop2MBB);
13026 F->insert(It, exitMBB);
13027 exitMBB->splice(exitMBB->begin(), BB,
13028 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13030
13031 MachineRegisterInfo &RegInfo = F->getRegInfo();
13032 Register TmpReg = (!BinOpcode) ? incr :
13033 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13034 : &PPC::GPRCRegClass);
13035
13036 // thisMBB:
13037 // ...
13038 // fallthrough --> loopMBB
13039 BB->addSuccessor(loopMBB);
13040
13041 // loopMBB:
13042 // l[wd]arx dest, ptr
13043 // add r0, dest, incr
13044 // st[wd]cx. r0, ptr
13045 // bne- loopMBB
13046 // fallthrough --> exitMBB
13047
13048 // For max/min...
13049 // loopMBB:
13050 // l[wd]arx dest, ptr
13051 // cmpl?[wd] dest, incr
13052 // bgt exitMBB
13053 // loop2MBB:
13054 // st[wd]cx. dest, ptr
13055 // bne- loopMBB
13056 // fallthrough --> exitMBB
13057
13058 BB = loopMBB;
13059 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13060 .addReg(ptrA).addReg(ptrB);
13061 if (BinOpcode)
13062 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13063 if (CmpOpcode) {
13064 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13065 // Signed comparisons of byte or halfword values must be sign-extended.
13066 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13067 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13068 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13069 ExtReg).addReg(dest);
13070 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13071 } else
13072 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13073
13074 BuildMI(BB, dl, TII->get(PPC::BCC))
13075 .addImm(CmpPred)
13076 .addReg(CrReg)
13077 .addMBB(exitMBB);
13078 BB->addSuccessor(loop2MBB);
13079 BB->addSuccessor(exitMBB);
13080 BB = loop2MBB;
13081 }
13082 BuildMI(BB, dl, TII->get(StoreMnemonic))
13083 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13084 BuildMI(BB, dl, TII->get(PPC::BCC))
13086 .addReg(PPC::CR0)
13087 .addMBB(loopMBB);
13088 BB->addSuccessor(loopMBB);
13089 BB->addSuccessor(exitMBB);
13090
13091 // exitMBB:
13092 // ...
13093 BB = exitMBB;
13094 return BB;
13095}
13096
13098 switch(MI.getOpcode()) {
13099 default:
13100 return false;
13101 case PPC::COPY:
13102 return TII->isSignExtended(MI.getOperand(1).getReg(),
13103 &MI.getMF()->getRegInfo());
13104 case PPC::LHA:
13105 case PPC::LHA8:
13106 case PPC::LHAU:
13107 case PPC::LHAU8:
13108 case PPC::LHAUX:
13109 case PPC::LHAUX8:
13110 case PPC::LHAX:
13111 case PPC::LHAX8:
13112 case PPC::LWA:
13113 case PPC::LWAUX:
13114 case PPC::LWAX:
13115 case PPC::LWAX_32:
13116 case PPC::LWA_32:
13117 case PPC::PLHA:
13118 case PPC::PLHA8:
13119 case PPC::PLHA8pc:
13120 case PPC::PLHApc:
13121 case PPC::PLWA:
13122 case PPC::PLWA8:
13123 case PPC::PLWA8pc:
13124 case PPC::PLWApc:
13125 case PPC::EXTSB:
13126 case PPC::EXTSB8:
13127 case PPC::EXTSB8_32_64:
13128 case PPC::EXTSB8_rec:
13129 case PPC::EXTSB_rec:
13130 case PPC::EXTSH:
13131 case PPC::EXTSH8:
13132 case PPC::EXTSH8_32_64:
13133 case PPC::EXTSH8_rec:
13134 case PPC::EXTSH_rec:
13135 case PPC::EXTSW:
13136 case PPC::EXTSWSLI:
13137 case PPC::EXTSWSLI_32_64:
13138 case PPC::EXTSWSLI_32_64_rec:
13139 case PPC::EXTSWSLI_rec:
13140 case PPC::EXTSW_32:
13141 case PPC::EXTSW_32_64:
13142 case PPC::EXTSW_32_64_rec:
13143 case PPC::EXTSW_rec:
13144 case PPC::SRAW:
13145 case PPC::SRAWI:
13146 case PPC::SRAWI_rec:
13147 case PPC::SRAW_rec:
13148 return true;
13149 }
13150 return false;
13151}
13152
13155 bool is8bit, // operation
13156 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13157 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13158 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13159
13160 // If this is a signed comparison and the value being compared is not known
13161 // to be sign extended, sign extend it here.
13162 DebugLoc dl = MI.getDebugLoc();
13163 MachineFunction *F = BB->getParent();
13164 MachineRegisterInfo &RegInfo = F->getRegInfo();
13165 Register incr = MI.getOperand(3).getReg();
13166 bool IsSignExtended =
13167 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13168
13169 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13170 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13171 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13172 .addReg(MI.getOperand(3).getReg());
13173 MI.getOperand(3).setReg(ValueReg);
13174 incr = ValueReg;
13175 }
13176 // If we support part-word atomic mnemonics, just use them
13177 if (Subtarget.hasPartwordAtomics())
13178 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13179 CmpPred);
13180
13181 // In 64 bit mode we have to use 64 bits for addresses, even though the
13182 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13183 // registers without caring whether they're 32 or 64, but here we're
13184 // doing actual arithmetic on the addresses.
13185 bool is64bit = Subtarget.isPPC64();
13186 bool isLittleEndian = Subtarget.isLittleEndian();
13187 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13188
13189 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13191
13192 Register dest = MI.getOperand(0).getReg();
13193 Register ptrA = MI.getOperand(1).getReg();
13194 Register ptrB = MI.getOperand(2).getReg();
13195
13196 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13197 MachineBasicBlock *loop2MBB =
13198 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13199 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13200 F->insert(It, loopMBB);
13201 if (CmpOpcode)
13202 F->insert(It, loop2MBB);
13203 F->insert(It, exitMBB);
13204 exitMBB->splice(exitMBB->begin(), BB,
13205 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13207
13208 const TargetRegisterClass *RC =
13209 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13210 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13211
13212 Register PtrReg = RegInfo.createVirtualRegister(RC);
13213 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13214 Register ShiftReg =
13215 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13216 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13217 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13218 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13219 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13220 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13221 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13222 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13223 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13224 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13225 Register Ptr1Reg;
13226 Register TmpReg =
13227 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13228
13229 // thisMBB:
13230 // ...
13231 // fallthrough --> loopMBB
13232 BB->addSuccessor(loopMBB);
13233
13234 // The 4-byte load must be aligned, while a char or short may be
13235 // anywhere in the word. Hence all this nasty bookkeeping code.
13236 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13237 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13238 // xori shift, shift1, 24 [16]
13239 // rlwinm ptr, ptr1, 0, 0, 29
13240 // slw incr2, incr, shift
13241 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13242 // slw mask, mask2, shift
13243 // loopMBB:
13244 // lwarx tmpDest, ptr
13245 // add tmp, tmpDest, incr2
13246 // andc tmp2, tmpDest, mask
13247 // and tmp3, tmp, mask
13248 // or tmp4, tmp3, tmp2
13249 // stwcx. tmp4, ptr
13250 // bne- loopMBB
13251 // fallthrough --> exitMBB
13252 // srw SrwDest, tmpDest, shift
13253 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13254 if (ptrA != ZeroReg) {
13255 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13256 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13257 .addReg(ptrA)
13258 .addReg(ptrB);
13259 } else {
13260 Ptr1Reg = ptrB;
13261 }
13262 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13263 // mode.
13264 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13265 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13266 .addImm(3)
13267 .addImm(27)
13268 .addImm(is8bit ? 28 : 27);
13269 if (!isLittleEndian)
13270 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13271 .addReg(Shift1Reg)
13272 .addImm(is8bit ? 24 : 16);
13273 if (is64bit)
13274 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13275 .addReg(Ptr1Reg)
13276 .addImm(0)
13277 .addImm(61);
13278 else
13279 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13280 .addReg(Ptr1Reg)
13281 .addImm(0)
13282 .addImm(0)
13283 .addImm(29);
13284 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13285 if (is8bit)
13286 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13287 else {
13288 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13289 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13290 .addReg(Mask3Reg)
13291 .addImm(65535);
13292 }
13293 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13294 .addReg(Mask2Reg)
13295 .addReg(ShiftReg);
13296
13297 BB = loopMBB;
13298 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13299 .addReg(ZeroReg)
13300 .addReg(PtrReg);
13301 if (BinOpcode)
13302 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13303 .addReg(Incr2Reg)
13304 .addReg(TmpDestReg);
13305 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13306 .addReg(TmpDestReg)
13307 .addReg(MaskReg);
13308 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13309 if (CmpOpcode) {
13310 // For unsigned comparisons, we can directly compare the shifted values.
13311 // For signed comparisons we shift and sign extend.
13312 Register SReg = RegInfo.createVirtualRegister(GPRC);
13313 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13314 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13315 .addReg(TmpDestReg)
13316 .addReg(MaskReg);
13317 unsigned ValueReg = SReg;
13318 unsigned CmpReg = Incr2Reg;
13319 if (CmpOpcode == PPC::CMPW) {
13320 ValueReg = RegInfo.createVirtualRegister(GPRC);
13321 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13322 .addReg(SReg)
13323 .addReg(ShiftReg);
13324 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13325 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13326 .addReg(ValueReg);
13327 ValueReg = ValueSReg;
13328 CmpReg = incr;
13329 }
13330 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13331 BuildMI(BB, dl, TII->get(PPC::BCC))
13332 .addImm(CmpPred)
13333 .addReg(CrReg)
13334 .addMBB(exitMBB);
13335 BB->addSuccessor(loop2MBB);
13336 BB->addSuccessor(exitMBB);
13337 BB = loop2MBB;
13338 }
13339 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13340 BuildMI(BB, dl, TII->get(PPC::STWCX))
13341 .addReg(Tmp4Reg)
13342 .addReg(ZeroReg)
13343 .addReg(PtrReg);
13344 BuildMI(BB, dl, TII->get(PPC::BCC))
13346 .addReg(PPC::CR0)
13347 .addMBB(loopMBB);
13348 BB->addSuccessor(loopMBB);
13349 BB->addSuccessor(exitMBB);
13350
13351 // exitMBB:
13352 // ...
13353 BB = exitMBB;
13354 // Since the shift amount is not a constant, we need to clear
13355 // the upper bits with a separate RLWINM.
13356 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13357 .addReg(SrwDestReg)
13358 .addImm(0)
13359 .addImm(is8bit ? 24 : 16)
13360 .addImm(31);
13361 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13362 .addReg(TmpDestReg)
13363 .addReg(ShiftReg);
13364 return BB;
13365}
13366
13369 MachineBasicBlock *MBB) const {
13370 DebugLoc DL = MI.getDebugLoc();
13371 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13372 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13373
13374 MachineFunction *MF = MBB->getParent();
13376
13377 const BasicBlock *BB = MBB->getBasicBlock();
13378 MachineFunction::iterator I = ++MBB->getIterator();
13379
13380 Register DstReg = MI.getOperand(0).getReg();
13381 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13382 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13383 Register mainDstReg = MRI.createVirtualRegister(RC);
13384 Register restoreDstReg = MRI.createVirtualRegister(RC);
13385
13386 MVT PVT = getPointerTy(MF->getDataLayout());
13387 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13388 "Invalid Pointer Size!");
13389 // For v = setjmp(buf), we generate
13390 //
13391 // thisMBB:
13392 // SjLjSetup mainMBB
13393 // bl mainMBB
13394 // v_restore = 1
13395 // b sinkMBB
13396 //
13397 // mainMBB:
13398 // buf[LabelOffset] = LR
13399 // v_main = 0
13400 //
13401 // sinkMBB:
13402 // v = phi(main, restore)
13403 //
13404
13405 MachineBasicBlock *thisMBB = MBB;
13406 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13407 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13408 MF->insert(I, mainMBB);
13409 MF->insert(I, sinkMBB);
13410
13412
13413 // Transfer the remainder of BB and its successor edges to sinkMBB.
13414 sinkMBB->splice(sinkMBB->begin(), MBB,
13415 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13417
13418 // Note that the structure of the jmp_buf used here is not compatible
13419 // with that used by libc, and is not designed to be. Specifically, it
13420 // stores only those 'reserved' registers that LLVM does not otherwise
13421 // understand how to spill. Also, by convention, by the time this
13422 // intrinsic is called, Clang has already stored the frame address in the
13423 // first slot of the buffer and stack address in the third. Following the
13424 // X86 target code, we'll store the jump address in the second slot. We also
13425 // need to save the TOC pointer (R2) to handle jumps between shared
13426 // libraries, and that will be stored in the fourth slot. The thread
13427 // identifier (R13) is not affected.
13428
13429 // thisMBB:
13430 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13431 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13432 const int64_t BPOffset = 4 * PVT.getStoreSize();
13433
13434 // Prepare IP either in reg.
13435 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13436 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13437 Register BufReg = MI.getOperand(1).getReg();
13438
13439 if (Subtarget.is64BitELFABI()) {
13440 setUsesTOCBasePtr(*MBB->getParent());
13441 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13442 .addReg(PPC::X2)
13443 .addImm(TOCOffset)
13444 .addReg(BufReg)
13445 .cloneMemRefs(MI);
13446 }
13447
13448 // Naked functions never have a base pointer, and so we use r1. For all
13449 // other functions, this decision must be delayed until during PEI.
13450 unsigned BaseReg;
13451 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13452 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13453 else
13454 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13455
13456 MIB = BuildMI(*thisMBB, MI, DL,
13457 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13458 .addReg(BaseReg)
13459 .addImm(BPOffset)
13460 .addReg(BufReg)
13461 .cloneMemRefs(MI);
13462
13463 // Setup
13464 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13465 MIB.addRegMask(TRI->getNoPreservedMask());
13466
13467 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13468
13469 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13470 .addMBB(mainMBB);
13471 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13472
13473 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13474 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13475
13476 // mainMBB:
13477 // mainDstReg = 0
13478 MIB =
13479 BuildMI(mainMBB, DL,
13480 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13481
13482 // Store IP
13483 if (Subtarget.isPPC64()) {
13484 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13485 .addReg(LabelReg)
13486 .addImm(LabelOffset)
13487 .addReg(BufReg);
13488 } else {
13489 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13490 .addReg(LabelReg)
13491 .addImm(LabelOffset)
13492 .addReg(BufReg);
13493 }
13494 MIB.cloneMemRefs(MI);
13495
13496 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13497 mainMBB->addSuccessor(sinkMBB);
13498
13499 // sinkMBB:
13500 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13501 TII->get(PPC::PHI), DstReg)
13502 .addReg(mainDstReg).addMBB(mainMBB)
13503 .addReg(restoreDstReg).addMBB(thisMBB);
13504
13505 MI.eraseFromParent();
13506 return sinkMBB;
13507}
13508
13511 MachineBasicBlock *MBB) const {
13512 DebugLoc DL = MI.getDebugLoc();
13513 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13514
13515 MachineFunction *MF = MBB->getParent();
13517
13518 MVT PVT = getPointerTy(MF->getDataLayout());
13519 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13520 "Invalid Pointer Size!");
13521
13522 const TargetRegisterClass *RC =
13523 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13524 Register Tmp = MRI.createVirtualRegister(RC);
13525 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13526 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13527 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13528 unsigned BP =
13529 (PVT == MVT::i64)
13530 ? PPC::X30
13531 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13532 : PPC::R30);
13533
13535
13536 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13537 const int64_t SPOffset = 2 * PVT.getStoreSize();
13538 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13539 const int64_t BPOffset = 4 * PVT.getStoreSize();
13540
13541 Register BufReg = MI.getOperand(0).getReg();
13542
13543 // Reload FP (the jumped-to function may not have had a
13544 // frame pointer, and if so, then its r31 will be restored
13545 // as necessary).
13546 if (PVT == MVT::i64) {
13547 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13548 .addImm(0)
13549 .addReg(BufReg);
13550 } else {
13551 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13552 .addImm(0)
13553 .addReg(BufReg);
13554 }
13555 MIB.cloneMemRefs(MI);
13556
13557 // Reload IP
13558 if (PVT == MVT::i64) {
13559 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13560 .addImm(LabelOffset)
13561 .addReg(BufReg);
13562 } else {
13563 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13564 .addImm(LabelOffset)
13565 .addReg(BufReg);
13566 }
13567 MIB.cloneMemRefs(MI);
13568
13569 // Reload SP
13570 if (PVT == MVT::i64) {
13571 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13572 .addImm(SPOffset)
13573 .addReg(BufReg);
13574 } else {
13575 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13576 .addImm(SPOffset)
13577 .addReg(BufReg);
13578 }
13579 MIB.cloneMemRefs(MI);
13580
13581 // Reload BP
13582 if (PVT == MVT::i64) {
13583 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13584 .addImm(BPOffset)
13585 .addReg(BufReg);
13586 } else {
13587 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13588 .addImm(BPOffset)
13589 .addReg(BufReg);
13590 }
13591 MIB.cloneMemRefs(MI);
13592
13593 // Reload TOC
13594 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13595 setUsesTOCBasePtr(*MBB->getParent());
13596 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13597 .addImm(TOCOffset)
13598 .addReg(BufReg)
13599 .cloneMemRefs(MI);
13600 }
13601
13602 // Jump
13603 BuildMI(*MBB, MI, DL,
13604 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13605 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13606
13607 MI.eraseFromParent();
13608 return MBB;
13609}
13610
13612 // If the function specifically requests inline stack probes, emit them.
13613 if (MF.getFunction().hasFnAttribute("probe-stack"))
13614 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13615 "inline-asm";
13616 return false;
13617}
13618
13620 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13621 unsigned StackAlign = TFI->getStackAlignment();
13622 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13623 "Unexpected stack alignment");
13624 // The default stack probe size is 4096 if the function has no
13625 // stack-probe-size attribute.
13626 const Function &Fn = MF.getFunction();
13627 unsigned StackProbeSize =
13628 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13629 // Round down to the stack alignment.
13630 StackProbeSize &= ~(StackAlign - 1);
13631 return StackProbeSize ? StackProbeSize : StackAlign;
13632}
13633
13634// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13635// into three phases. In the first phase, it uses pseudo instruction
13636// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13637// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13638// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13639// MaxCallFrameSize so that it can calculate correct data area pointer.
13642 MachineBasicBlock *MBB) const {
13643 const bool isPPC64 = Subtarget.isPPC64();
13644 MachineFunction *MF = MBB->getParent();
13645 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13646 DebugLoc DL = MI.getDebugLoc();
13647 const unsigned ProbeSize = getStackProbeSize(*MF);
13648 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13650 // The CFG of probing stack looks as
13651 // +-----+
13652 // | MBB |
13653 // +--+--+
13654 // |
13655 // +----v----+
13656 // +--->+ TestMBB +---+
13657 // | +----+----+ |
13658 // | | |
13659 // | +-----v----+ |
13660 // +---+ BlockMBB | |
13661 // +----------+ |
13662 // |
13663 // +---------+ |
13664 // | TailMBB +<--+
13665 // +---------+
13666 // In MBB, calculate previous frame pointer and final stack pointer.
13667 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13668 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13669 // TailMBB is spliced via \p MI.
13670 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13671 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13672 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13673
13674 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13675 MF->insert(MBBIter, TestMBB);
13676 MF->insert(MBBIter, BlockMBB);
13677 MF->insert(MBBIter, TailMBB);
13678
13679 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13680 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13681
13682 Register DstReg = MI.getOperand(0).getReg();
13683 Register NegSizeReg = MI.getOperand(1).getReg();
13684 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13685 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13686 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13687 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13688
13689 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13690 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13691 // NegSize.
13692 unsigned ProbeOpc;
13693 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13694 ProbeOpc =
13695 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13696 else
13697 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13698 // and NegSizeReg will be allocated in the same phyreg to avoid
13699 // redundant copy when NegSizeReg has only one use which is current MI and
13700 // will be replaced by PREPARE_PROBED_ALLOCA then.
13701 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13702 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13703 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13704 .addDef(ActualNegSizeReg)
13705 .addReg(NegSizeReg)
13706 .add(MI.getOperand(2))
13707 .add(MI.getOperand(3));
13708
13709 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13710 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13711 FinalStackPtr)
13712 .addReg(SPReg)
13713 .addReg(ActualNegSizeReg);
13714
13715 // Materialize a scratch register for update.
13716 int64_t NegProbeSize = -(int64_t)ProbeSize;
13717 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13718 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13719 if (!isInt<16>(NegProbeSize)) {
13720 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13721 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13722 .addImm(NegProbeSize >> 16);
13723 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13724 ScratchReg)
13725 .addReg(TempReg)
13726 .addImm(NegProbeSize & 0xFFFF);
13727 } else
13728 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13729 .addImm(NegProbeSize);
13730
13731 {
13732 // Probing leading residual part.
13733 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13734 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13735 .addReg(ActualNegSizeReg)
13736 .addReg(ScratchReg);
13737 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13738 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13739 .addReg(Div)
13740 .addReg(ScratchReg);
13741 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13742 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13743 .addReg(Mul)
13744 .addReg(ActualNegSizeReg);
13745 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13746 .addReg(FramePointer)
13747 .addReg(SPReg)
13748 .addReg(NegMod);
13749 }
13750
13751 {
13752 // Remaining part should be multiple of ProbeSize.
13753 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13754 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13755 .addReg(SPReg)
13756 .addReg(FinalStackPtr);
13757 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13759 .addReg(CmpResult)
13760 .addMBB(TailMBB);
13761 TestMBB->addSuccessor(BlockMBB);
13762 TestMBB->addSuccessor(TailMBB);
13763 }
13764
13765 {
13766 // Touch the block.
13767 // |P...|P...|P...
13768 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13769 .addReg(FramePointer)
13770 .addReg(SPReg)
13771 .addReg(ScratchReg);
13772 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13773 BlockMBB->addSuccessor(TestMBB);
13774 }
13775
13776 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13777 // DYNAREAOFFSET pseudo instruction to get the future result.
13778 Register MaxCallFrameSizeReg =
13779 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13780 BuildMI(TailMBB, DL,
13781 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13782 MaxCallFrameSizeReg)
13783 .add(MI.getOperand(2))
13784 .add(MI.getOperand(3));
13785 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13786 .addReg(SPReg)
13787 .addReg(MaxCallFrameSizeReg);
13788
13789 // Splice instructions after MI to TailMBB.
13790 TailMBB->splice(TailMBB->end(), MBB,
13791 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13793 MBB->addSuccessor(TestMBB);
13794
13795 // Delete the pseudo instruction.
13796 MI.eraseFromParent();
13797
13798 ++NumDynamicAllocaProbed;
13799 return TailMBB;
13800}
13801
13803 switch (MI.getOpcode()) {
13804 case PPC::SELECT_CC_I4:
13805 case PPC::SELECT_CC_I8:
13806 case PPC::SELECT_CC_F4:
13807 case PPC::SELECT_CC_F8:
13808 case PPC::SELECT_CC_F16:
13809 case PPC::SELECT_CC_VRRC:
13810 case PPC::SELECT_CC_VSFRC:
13811 case PPC::SELECT_CC_VSSRC:
13812 case PPC::SELECT_CC_VSRC:
13813 case PPC::SELECT_CC_SPE4:
13814 case PPC::SELECT_CC_SPE:
13815 return true;
13816 default:
13817 return false;
13818 }
13819}
13820
13821static bool IsSelect(MachineInstr &MI) {
13822 switch (MI.getOpcode()) {
13823 case PPC::SELECT_I4:
13824 case PPC::SELECT_I8:
13825 case PPC::SELECT_F4:
13826 case PPC::SELECT_F8:
13827 case PPC::SELECT_F16:
13828 case PPC::SELECT_SPE:
13829 case PPC::SELECT_SPE4:
13830 case PPC::SELECT_VRRC:
13831 case PPC::SELECT_VSFRC:
13832 case PPC::SELECT_VSSRC:
13833 case PPC::SELECT_VSRC:
13834 return true;
13835 default:
13836 return false;
13837 }
13838}
13839
13842 MachineBasicBlock *BB) const {
13843 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13844 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13845 if (Subtarget.is64BitELFABI() &&
13846 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13847 !Subtarget.isUsingPCRelativeCalls()) {
13848 // Call lowering should have added an r2 operand to indicate a dependence
13849 // on the TOC base pointer value. It can't however, because there is no
13850 // way to mark the dependence as implicit there, and so the stackmap code
13851 // will confuse it with a regular operand. Instead, add the dependence
13852 // here.
13853 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13854 }
13855
13856 return emitPatchPoint(MI, BB);
13857 }
13858
13859 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13860 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13861 return emitEHSjLjSetJmp(MI, BB);
13862 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13863 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13864 return emitEHSjLjLongJmp(MI, BB);
13865 }
13866
13867 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13868
13869 // To "insert" these instructions we actually have to insert their
13870 // control-flow patterns.
13871 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13873
13874 MachineFunction *F = BB->getParent();
13875 MachineRegisterInfo &MRI = F->getRegInfo();
13876
13877 if (Subtarget.hasISEL() &&
13878 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13879 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13880 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13882 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13883 MI.getOpcode() == PPC::SELECT_CC_I8)
13884 Cond.push_back(MI.getOperand(4));
13885 else
13887 Cond.push_back(MI.getOperand(1));
13888
13889 DebugLoc dl = MI.getDebugLoc();
13890 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13891 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13892 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13893 // The incoming instruction knows the destination vreg to set, the
13894 // condition code register to branch on, the true/false values to
13895 // select between, and a branch opcode to use.
13896
13897 // thisMBB:
13898 // ...
13899 // TrueVal = ...
13900 // cmpTY ccX, r1, r2
13901 // bCC sinkMBB
13902 // fallthrough --> copy0MBB
13903 MachineBasicBlock *thisMBB = BB;
13904 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13905 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13906 DebugLoc dl = MI.getDebugLoc();
13907 F->insert(It, copy0MBB);
13908 F->insert(It, sinkMBB);
13909
13910 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13911 copy0MBB->addLiveIn(PPC::CARRY);
13912 sinkMBB->addLiveIn(PPC::CARRY);
13913 }
13914
13915 // Set the call frame size on entry to the new basic blocks.
13916 // See https://reviews.llvm.org/D156113.
13917 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13918 copy0MBB->setCallFrameSize(CallFrameSize);
13919 sinkMBB->setCallFrameSize(CallFrameSize);
13920
13921 // Transfer the remainder of BB and its successor edges to sinkMBB.
13922 sinkMBB->splice(sinkMBB->begin(), BB,
13923 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13925
13926 // Next, add the true and fallthrough blocks as its successors.
13927 BB->addSuccessor(copy0MBB);
13928 BB->addSuccessor(sinkMBB);
13929
13930 if (IsSelect(MI)) {
13931 BuildMI(BB, dl, TII->get(PPC::BC))
13932 .addReg(MI.getOperand(1).getReg())
13933 .addMBB(sinkMBB);
13934 } else {
13935 unsigned SelectPred = MI.getOperand(4).getImm();
13936 BuildMI(BB, dl, TII->get(PPC::BCC))
13937 .addImm(SelectPred)
13938 .addReg(MI.getOperand(1).getReg())
13939 .addMBB(sinkMBB);
13940 }
13941
13942 // copy0MBB:
13943 // %FalseValue = ...
13944 // # fallthrough to sinkMBB
13945 BB = copy0MBB;
13946
13947 // Update machine-CFG edges
13948 BB->addSuccessor(sinkMBB);
13949
13950 // sinkMBB:
13951 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13952 // ...
13953 BB = sinkMBB;
13954 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13955 .addReg(MI.getOperand(3).getReg())
13956 .addMBB(copy0MBB)
13957 .addReg(MI.getOperand(2).getReg())
13958 .addMBB(thisMBB);
13959 } else if (MI.getOpcode() == PPC::ReadTB) {
13960 // To read the 64-bit time-base register on a 32-bit target, we read the
13961 // two halves. Should the counter have wrapped while it was being read, we
13962 // need to try again.
13963 // ...
13964 // readLoop:
13965 // mfspr Rx,TBU # load from TBU
13966 // mfspr Ry,TB # load from TB
13967 // mfspr Rz,TBU # load from TBU
13968 // cmpw crX,Rx,Rz # check if 'old'='new'
13969 // bne readLoop # branch if they're not equal
13970 // ...
13971
13972 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13973 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13974 DebugLoc dl = MI.getDebugLoc();
13975 F->insert(It, readMBB);
13976 F->insert(It, sinkMBB);
13977
13978 // Transfer the remainder of BB and its successor edges to sinkMBB.
13979 sinkMBB->splice(sinkMBB->begin(), BB,
13980 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13982
13983 BB->addSuccessor(readMBB);
13984 BB = readMBB;
13985
13986 MachineRegisterInfo &RegInfo = F->getRegInfo();
13987 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13988 Register LoReg = MI.getOperand(0).getReg();
13989 Register HiReg = MI.getOperand(1).getReg();
13990
13991 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13992 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13993 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13994
13995 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13996
13997 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13998 .addReg(HiReg)
13999 .addReg(ReadAgainReg);
14000 BuildMI(BB, dl, TII->get(PPC::BCC))
14002 .addReg(CmpReg)
14003 .addMBB(readMBB);
14004
14005 BB->addSuccessor(readMBB);
14006 BB->addSuccessor(sinkMBB);
14007 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14008 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
14009 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14010 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
14011 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14012 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
14013 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14014 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
14015
14016 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14017 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
14018 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14019 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
14020 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14021 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
14022 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14023 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
14024
14025 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14026 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
14027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14028 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14029 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14030 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14031 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14032 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14033
14034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14035 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14037 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14038 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14039 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14040 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14041 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14042
14043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14044 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14046 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14048 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14050 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14051
14052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14053 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14055 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14057 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14059 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14060
14061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14062 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14064 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14066 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14068 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14069
14070 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14071 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14073 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14075 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14077 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14078
14079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14080 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14082 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14083 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14084 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14086 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14087
14088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14089 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14091 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14093 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14095 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14096
14097 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14098 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14099 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14100 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14101 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14102 BB = EmitAtomicBinary(MI, BB, 4, 0);
14103 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14104 BB = EmitAtomicBinary(MI, BB, 8, 0);
14105 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14106 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14107 (Subtarget.hasPartwordAtomics() &&
14108 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14109 (Subtarget.hasPartwordAtomics() &&
14110 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14111 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14112
14113 auto LoadMnemonic = PPC::LDARX;
14114 auto StoreMnemonic = PPC::STDCX;
14115 switch (MI.getOpcode()) {
14116 default:
14117 llvm_unreachable("Compare and swap of unknown size");
14118 case PPC::ATOMIC_CMP_SWAP_I8:
14119 LoadMnemonic = PPC::LBARX;
14120 StoreMnemonic = PPC::STBCX;
14121 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14122 break;
14123 case PPC::ATOMIC_CMP_SWAP_I16:
14124 LoadMnemonic = PPC::LHARX;
14125 StoreMnemonic = PPC::STHCX;
14126 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14127 break;
14128 case PPC::ATOMIC_CMP_SWAP_I32:
14129 LoadMnemonic = PPC::LWARX;
14130 StoreMnemonic = PPC::STWCX;
14131 break;
14132 case PPC::ATOMIC_CMP_SWAP_I64:
14133 LoadMnemonic = PPC::LDARX;
14134 StoreMnemonic = PPC::STDCX;
14135 break;
14136 }
14137 MachineRegisterInfo &RegInfo = F->getRegInfo();
14138 Register dest = MI.getOperand(0).getReg();
14139 Register ptrA = MI.getOperand(1).getReg();
14140 Register ptrB = MI.getOperand(2).getReg();
14141 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14142 Register oldval = MI.getOperand(3).getReg();
14143 Register newval = MI.getOperand(4).getReg();
14144 DebugLoc dl = MI.getDebugLoc();
14145
14146 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14147 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14148 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14149 F->insert(It, loop1MBB);
14150 F->insert(It, loop2MBB);
14151 F->insert(It, exitMBB);
14152 exitMBB->splice(exitMBB->begin(), BB,
14153 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14155
14156 // thisMBB:
14157 // ...
14158 // fallthrough --> loopMBB
14159 BB->addSuccessor(loop1MBB);
14160
14161 // loop1MBB:
14162 // l[bhwd]arx dest, ptr
14163 // cmp[wd] dest, oldval
14164 // bne- exitBB
14165 // loop2MBB:
14166 // st[bhwd]cx. newval, ptr
14167 // bne- loopMBB
14168 // b exitBB
14169 // exitBB:
14170 BB = loop1MBB;
14171 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14172 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14173 .addReg(dest)
14174 .addReg(oldval);
14175 BuildMI(BB, dl, TII->get(PPC::BCC))
14177 .addReg(CrReg)
14178 .addMBB(exitMBB);
14179 BB->addSuccessor(loop2MBB);
14180 BB->addSuccessor(exitMBB);
14181
14182 BB = loop2MBB;
14183 BuildMI(BB, dl, TII->get(StoreMnemonic))
14184 .addReg(newval)
14185 .addReg(ptrA)
14186 .addReg(ptrB);
14187 BuildMI(BB, dl, TII->get(PPC::BCC))
14189 .addReg(PPC::CR0)
14190 .addMBB(loop1MBB);
14191 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14192 BB->addSuccessor(loop1MBB);
14193 BB->addSuccessor(exitMBB);
14194
14195 // exitMBB:
14196 // ...
14197 BB = exitMBB;
14198 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14199 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14200 // We must use 64-bit registers for addresses when targeting 64-bit,
14201 // since we're actually doing arithmetic on them. Other registers
14202 // can be 32-bit.
14203 bool is64bit = Subtarget.isPPC64();
14204 bool isLittleEndian = Subtarget.isLittleEndian();
14205 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14206
14207 Register dest = MI.getOperand(0).getReg();
14208 Register ptrA = MI.getOperand(1).getReg();
14209 Register ptrB = MI.getOperand(2).getReg();
14210 Register oldval = MI.getOperand(3).getReg();
14211 Register newval = MI.getOperand(4).getReg();
14212 DebugLoc dl = MI.getDebugLoc();
14213
14214 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14215 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14216 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14217 F->insert(It, loop1MBB);
14218 F->insert(It, loop2MBB);
14219 F->insert(It, exitMBB);
14220 exitMBB->splice(exitMBB->begin(), BB,
14221 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14223
14224 MachineRegisterInfo &RegInfo = F->getRegInfo();
14225 const TargetRegisterClass *RC =
14226 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14227 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14228
14229 Register PtrReg = RegInfo.createVirtualRegister(RC);
14230 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14231 Register ShiftReg =
14232 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14233 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14234 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14235 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14236 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14237 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14238 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14239 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14240 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14241 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14242 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14243 Register Ptr1Reg;
14244 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14245 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14246 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14247 // thisMBB:
14248 // ...
14249 // fallthrough --> loopMBB
14250 BB->addSuccessor(loop1MBB);
14251
14252 // The 4-byte load must be aligned, while a char or short may be
14253 // anywhere in the word. Hence all this nasty bookkeeping code.
14254 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14255 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14256 // xori shift, shift1, 24 [16]
14257 // rlwinm ptr, ptr1, 0, 0, 29
14258 // slw newval2, newval, shift
14259 // slw oldval2, oldval,shift
14260 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14261 // slw mask, mask2, shift
14262 // and newval3, newval2, mask
14263 // and oldval3, oldval2, mask
14264 // loop1MBB:
14265 // lwarx tmpDest, ptr
14266 // and tmp, tmpDest, mask
14267 // cmpw tmp, oldval3
14268 // bne- exitBB
14269 // loop2MBB:
14270 // andc tmp2, tmpDest, mask
14271 // or tmp4, tmp2, newval3
14272 // stwcx. tmp4, ptr
14273 // bne- loop1MBB
14274 // b exitBB
14275 // exitBB:
14276 // srw dest, tmpDest, shift
14277 if (ptrA != ZeroReg) {
14278 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14279 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14280 .addReg(ptrA)
14281 .addReg(ptrB);
14282 } else {
14283 Ptr1Reg = ptrB;
14284 }
14285
14286 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14287 // mode.
14288 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14289 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14290 .addImm(3)
14291 .addImm(27)
14292 .addImm(is8bit ? 28 : 27);
14293 if (!isLittleEndian)
14294 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14295 .addReg(Shift1Reg)
14296 .addImm(is8bit ? 24 : 16);
14297 if (is64bit)
14298 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14299 .addReg(Ptr1Reg)
14300 .addImm(0)
14301 .addImm(61);
14302 else
14303 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14304 .addReg(Ptr1Reg)
14305 .addImm(0)
14306 .addImm(0)
14307 .addImm(29);
14308 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14309 .addReg(newval)
14310 .addReg(ShiftReg);
14311 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14312 .addReg(oldval)
14313 .addReg(ShiftReg);
14314 if (is8bit)
14315 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14316 else {
14317 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14318 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14319 .addReg(Mask3Reg)
14320 .addImm(65535);
14321 }
14322 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14323 .addReg(Mask2Reg)
14324 .addReg(ShiftReg);
14325 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14326 .addReg(NewVal2Reg)
14327 .addReg(MaskReg);
14328 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14329 .addReg(OldVal2Reg)
14330 .addReg(MaskReg);
14331
14332 BB = loop1MBB;
14333 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14334 .addReg(ZeroReg)
14335 .addReg(PtrReg);
14336 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14337 .addReg(TmpDestReg)
14338 .addReg(MaskReg);
14339 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14340 .addReg(TmpReg)
14341 .addReg(OldVal3Reg);
14342 BuildMI(BB, dl, TII->get(PPC::BCC))
14344 .addReg(CrReg)
14345 .addMBB(exitMBB);
14346 BB->addSuccessor(loop2MBB);
14347 BB->addSuccessor(exitMBB);
14348
14349 BB = loop2MBB;
14350 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14351 .addReg(TmpDestReg)
14352 .addReg(MaskReg);
14353 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14354 .addReg(Tmp2Reg)
14355 .addReg(NewVal3Reg);
14356 BuildMI(BB, dl, TII->get(PPC::STWCX))
14357 .addReg(Tmp4Reg)
14358 .addReg(ZeroReg)
14359 .addReg(PtrReg);
14360 BuildMI(BB, dl, TII->get(PPC::BCC))
14362 .addReg(PPC::CR0)
14363 .addMBB(loop1MBB);
14364 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14365 BB->addSuccessor(loop1MBB);
14366 BB->addSuccessor(exitMBB);
14367
14368 // exitMBB:
14369 // ...
14370 BB = exitMBB;
14371 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14372 .addReg(TmpReg)
14373 .addReg(ShiftReg);
14374 } else if (MI.getOpcode() == PPC::FADDrtz) {
14375 // This pseudo performs an FADD with rounding mode temporarily forced
14376 // to round-to-zero. We emit this via custom inserter since the FPSCR
14377 // is not modeled at the SelectionDAG level.
14378 Register Dest = MI.getOperand(0).getReg();
14379 Register Src1 = MI.getOperand(1).getReg();
14380 Register Src2 = MI.getOperand(2).getReg();
14381 DebugLoc dl = MI.getDebugLoc();
14382
14383 MachineRegisterInfo &RegInfo = F->getRegInfo();
14384 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14385
14386 // Save FPSCR value.
14387 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14388
14389 // Set rounding mode to round-to-zero.
14390 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14391 .addImm(31)
14393
14394 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14395 .addImm(30)
14397
14398 // Perform addition.
14399 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14400 .addReg(Src1)
14401 .addReg(Src2);
14402 if (MI.getFlag(MachineInstr::NoFPExcept))
14404
14405 // Restore FPSCR value.
14406 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14407 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14408 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14409 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14410 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14411 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14412 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14413 ? PPC::ANDI8_rec
14414 : PPC::ANDI_rec;
14415 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14416 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14417
14418 MachineRegisterInfo &RegInfo = F->getRegInfo();
14419 Register Dest = RegInfo.createVirtualRegister(
14420 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14421
14422 DebugLoc Dl = MI.getDebugLoc();
14423 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14424 .addReg(MI.getOperand(1).getReg())
14425 .addImm(1);
14426 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14427 MI.getOperand(0).getReg())
14428 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14429 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14430 DebugLoc Dl = MI.getDebugLoc();
14431 MachineRegisterInfo &RegInfo = F->getRegInfo();
14432 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14433 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14434 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14435 MI.getOperand(0).getReg())
14436 .addReg(CRReg);
14437 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14438 DebugLoc Dl = MI.getDebugLoc();
14439 unsigned Imm = MI.getOperand(1).getImm();
14440 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14441 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14442 MI.getOperand(0).getReg())
14443 .addReg(PPC::CR0EQ);
14444 } else if (MI.getOpcode() == PPC::SETRNDi) {
14445 DebugLoc dl = MI.getDebugLoc();
14446 Register OldFPSCRReg = MI.getOperand(0).getReg();
14447
14448 // Save FPSCR value.
14449 if (MRI.use_empty(OldFPSCRReg))
14450 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14451 else
14452 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14453
14454 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14455 // the following settings:
14456 // 00 Round to nearest
14457 // 01 Round to 0
14458 // 10 Round to +inf
14459 // 11 Round to -inf
14460
14461 // When the operand is immediate, using the two least significant bits of
14462 // the immediate to set the bits 62:63 of FPSCR.
14463 unsigned Mode = MI.getOperand(1).getImm();
14464 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14465 .addImm(31)
14467
14468 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14469 .addImm(30)
14471 } else if (MI.getOpcode() == PPC::SETRND) {
14472 DebugLoc dl = MI.getDebugLoc();
14473
14474 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14475 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14476 // If the target doesn't have DirectMove, we should use stack to do the
14477 // conversion, because the target doesn't have the instructions like mtvsrd
14478 // or mfvsrd to do this conversion directly.
14479 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14480 if (Subtarget.hasDirectMove()) {
14481 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14482 .addReg(SrcReg);
14483 } else {
14484 // Use stack to do the register copy.
14485 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14486 MachineRegisterInfo &RegInfo = F->getRegInfo();
14487 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14488 if (RC == &PPC::F8RCRegClass) {
14489 // Copy register from F8RCRegClass to G8RCRegclass.
14490 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14491 "Unsupported RegClass.");
14492
14493 StoreOp = PPC::STFD;
14494 LoadOp = PPC::LD;
14495 } else {
14496 // Copy register from G8RCRegClass to F8RCRegclass.
14497 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14498 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14499 "Unsupported RegClass.");
14500 }
14501
14502 MachineFrameInfo &MFI = F->getFrameInfo();
14503 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14504
14505 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14506 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14508 MFI.getObjectAlign(FrameIdx));
14509
14510 // Store the SrcReg into the stack.
14511 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14512 .addReg(SrcReg)
14513 .addImm(0)
14514 .addFrameIndex(FrameIdx)
14515 .addMemOperand(MMOStore);
14516
14517 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14518 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14520 MFI.getObjectAlign(FrameIdx));
14521
14522 // Load from the stack where SrcReg is stored, and save to DestReg,
14523 // so we have done the RegClass conversion from RegClass::SrcReg to
14524 // RegClass::DestReg.
14525 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14526 .addImm(0)
14527 .addFrameIndex(FrameIdx)
14528 .addMemOperand(MMOLoad);
14529 }
14530 };
14531
14532 Register OldFPSCRReg = MI.getOperand(0).getReg();
14533
14534 // Save FPSCR value.
14535 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14536
14537 // When the operand is gprc register, use two least significant bits of the
14538 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14539 //
14540 // copy OldFPSCRTmpReg, OldFPSCRReg
14541 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14542 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14543 // copy NewFPSCRReg, NewFPSCRTmpReg
14544 // mtfsf 255, NewFPSCRReg
14545 MachineOperand SrcOp = MI.getOperand(1);
14546 MachineRegisterInfo &RegInfo = F->getRegInfo();
14547 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14548
14549 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14550
14551 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14552 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14553
14554 // The first operand of INSERT_SUBREG should be a register which has
14555 // subregisters, we only care about its RegClass, so we should use an
14556 // IMPLICIT_DEF register.
14557 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14558 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14559 .addReg(ImDefReg)
14560 .add(SrcOp)
14561 .addImm(1);
14562
14563 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14564 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14565 .addReg(OldFPSCRTmpReg)
14566 .addReg(ExtSrcReg)
14567 .addImm(0)
14568 .addImm(62);
14569
14570 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14571 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14572
14573 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14574 // bits of FPSCR.
14575 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14576 .addImm(255)
14577 .addReg(NewFPSCRReg)
14578 .addImm(0)
14579 .addImm(0);
14580 } else if (MI.getOpcode() == PPC::SETFLM) {
14581 DebugLoc Dl = MI.getDebugLoc();
14582
14583 // Result of setflm is previous FPSCR content, so we need to save it first.
14584 Register OldFPSCRReg = MI.getOperand(0).getReg();
14585 if (MRI.use_empty(OldFPSCRReg))
14586 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14587 else
14588 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14589
14590 // Put bits in 32:63 to FPSCR.
14591 Register NewFPSCRReg = MI.getOperand(1).getReg();
14592 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14593 .addImm(255)
14594 .addReg(NewFPSCRReg)
14595 .addImm(0)
14596 .addImm(0);
14597 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14598 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14599 return emitProbedAlloca(MI, BB);
14600 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14601 DebugLoc DL = MI.getDebugLoc();
14602 Register Src = MI.getOperand(2).getReg();
14603 Register Lo = MI.getOperand(0).getReg();
14604 Register Hi = MI.getOperand(1).getReg();
14605 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14606 .addDef(Lo)
14607 .addUse(Src, {}, PPC::sub_gp8_x1);
14608 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14609 .addDef(Hi)
14610 .addUse(Src, {}, PPC::sub_gp8_x0);
14611 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14612 MI.getOpcode() == PPC::STQX_PSEUDO) {
14613 DebugLoc DL = MI.getDebugLoc();
14614 // Ptr is used as the ptr_rc_no_r0 part
14615 // of LQ/STQ's memory operand and adding result of RA and RB,
14616 // so it has to be g8rc_and_g8rc_nox0.
14617 Register Ptr =
14618 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14619 Register Val = MI.getOperand(0).getReg();
14620 Register RA = MI.getOperand(1).getReg();
14621 Register RB = MI.getOperand(2).getReg();
14622 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14623 BuildMI(*BB, MI, DL,
14624 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14625 : TII->get(PPC::STQ))
14626 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14627 .addImm(0)
14628 .addReg(Ptr);
14629 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14630 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14631 DebugLoc DL = MI.getDebugLoc();
14632 Register DstReg = MI.getOperand(0).getReg();
14633 Register PtrReg = MI.getOperand(1).getReg();
14634 Register ValReg = MI.getOperand(2).getReg();
14635 unsigned FC = MI.getOperand(3).getImm();
14636 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14637 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14638 if (IsLwat)
14639 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14640 .addImm(0)
14641 .addReg(ValReg)
14642 .addImm(PPC::sub_32);
14643 else
14644 Val64 = ValReg;
14645
14646 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14647 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14648 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14649 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14650 .addReg(UndefG8r)
14651 .addImm(PPC::sub_gp8_x0)
14652 .addReg(Val64)
14653 .addImm(PPC::sub_gp8_x1);
14654
14655 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14656 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14657 .addReg(G8rPair)
14658 .addReg(PtrReg)
14659 .addImm(FC);
14660 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14661 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14662 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14663 if (IsLwat)
14664 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14665 .addReg(Result64, {}, PPC::sub_32);
14666 else
14667 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14668 .addReg(Result64);
14669 } else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO ||
14670 MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14671 DebugLoc DL = MI.getDebugLoc();
14672 Register DstReg = MI.getOperand(0).getReg();
14673 Register PtrReg = MI.getOperand(1).getReg();
14674 unsigned FC = MI.getOperand(2).getImm();
14675 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14676
14677 Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14678 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
14679
14680 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14681 BuildMI(*BB, MI, DL, TII->get(IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14682 PairResult)
14683 .addReg(Pair)
14684 .addReg(PtrReg)
14685 .addImm(FC);
14686 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14687 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14688 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14689 if (IsLwat_Cond)
14690 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14691 .addReg(Result64, {}, PPC::sub_32);
14692 else
14693 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14694 .addReg(Result64);
14695 } else {
14696 llvm_unreachable("Unexpected instr type to insert");
14697 }
14698
14699 MI.eraseFromParent(); // The pseudo instruction is gone now.
14700 return BB;
14701}
14702
14703//===----------------------------------------------------------------------===//
14704// Target Optimization Hooks
14705//===----------------------------------------------------------------------===//
14706
14707static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14708 // For the estimates, convergence is quadratic, so we essentially double the
14709 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14710 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14711 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14712 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14713 if (VT.getScalarType() == MVT::f64)
14714 RefinementSteps++;
14715 return RefinementSteps;
14716}
14717
14718SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14719 const DenormalMode &Mode) const {
14720 // We only have VSX Vector Test for software Square Root.
14721 EVT VT = Op.getValueType();
14722 if (!isTypeLegal(MVT::i1) ||
14723 (VT != MVT::f64 &&
14724 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14726
14727 SDLoc DL(Op);
14728 // The output register of FTSQRT is CR field.
14729 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14730 // ftsqrt BF,FRB
14731 // Let e_b be the unbiased exponent of the double-precision
14732 // floating-point operand in register FRB.
14733 // fe_flag is set to 1 if either of the following conditions occurs.
14734 // - The double-precision floating-point operand in register FRB is a zero,
14735 // a NaN, or an infinity, or a negative value.
14736 // - e_b is less than or equal to -970.
14737 // Otherwise fe_flag is set to 0.
14738 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14739 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14740 // exponent is less than -970)
14741 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14742 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14743 FTSQRT, SRIdxVal),
14744 0);
14745}
14746
14747SDValue
14748PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14749 SelectionDAG &DAG) const {
14750 // We only have VSX Vector Square Root.
14751 EVT VT = Op.getValueType();
14752 if (VT != MVT::f64 &&
14753 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14755
14756 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14757}
14758
14759SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14760 int Enabled, int &RefinementSteps,
14761 bool &UseOneConstNR,
14762 bool Reciprocal) const {
14763 EVT VT = Operand.getValueType();
14764 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14765 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14766 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14767 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14768 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14769 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14770
14771 // The Newton-Raphson computation with a single constant does not provide
14772 // enough accuracy on some CPUs.
14773 UseOneConstNR = !Subtarget.needsTwoConstNR();
14774 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14775 }
14776 return SDValue();
14777}
14778
14779SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14780 int Enabled,
14781 int &RefinementSteps) const {
14782 EVT VT = Operand.getValueType();
14783 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14784 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14785 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14786 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14787 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14788 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14789 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14790 }
14791 return SDValue();
14792}
14793
14795 // Note: This functionality is used only when arcp is enabled, and
14796 // on cores with reciprocal estimates (which are used when arcp is
14797 // enabled for division), this functionality is redundant with the default
14798 // combiner logic (once the division -> reciprocal/multiply transformation
14799 // has taken place). As a result, this matters more for older cores than for
14800 // newer ones.
14801
14802 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14803 // reciprocal if there are two or more FDIVs (for embedded cores with only
14804 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14805 switch (Subtarget.getCPUDirective()) {
14806 default:
14807 return 3;
14808 case PPC::DIR_440:
14809 case PPC::DIR_A2:
14810 case PPC::DIR_E500:
14811 case PPC::DIR_E500mc:
14812 case PPC::DIR_E5500:
14813 return 2;
14814 }
14815}
14816
14817// isConsecutiveLSLoc needs to work even if all adds have not yet been
14818// collapsed, and so we need to look through chains of them.
14820 int64_t& Offset, SelectionDAG &DAG) {
14821 if (DAG.isBaseWithConstantOffset(Loc)) {
14822 Base = Loc.getOperand(0);
14823 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14824
14825 // The base might itself be a base plus an offset, and if so, accumulate
14826 // that as well.
14827 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14828 }
14829}
14830
14832 unsigned Bytes, int Dist,
14833 SelectionDAG &DAG) {
14834 if (VT.getSizeInBits() / 8 != Bytes)
14835 return false;
14836
14837 SDValue BaseLoc = Base->getBasePtr();
14838 if (Loc.getOpcode() == ISD::FrameIndex) {
14839 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14840 return false;
14842 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14843 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14844 int FS = MFI.getObjectSize(FI);
14845 int BFS = MFI.getObjectSize(BFI);
14846 if (FS != BFS || FS != (int)Bytes) return false;
14847 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14848 }
14849
14850 SDValue Base1 = Loc, Base2 = BaseLoc;
14851 int64_t Offset1 = 0, Offset2 = 0;
14852 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14853 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14854 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14855 return true;
14856
14857 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14858 const GlobalValue *GV1 = nullptr;
14859 const GlobalValue *GV2 = nullptr;
14860 Offset1 = 0;
14861 Offset2 = 0;
14862 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14863 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14864 if (isGA1 && isGA2 && GV1 == GV2)
14865 return Offset1 == (Offset2 + Dist*Bytes);
14866 return false;
14867}
14868
14869// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14870// not enforce equality of the chain operands.
14872 unsigned Bytes, int Dist,
14873 SelectionDAG &DAG) {
14875 EVT VT = LS->getMemoryVT();
14876 SDValue Loc = LS->getBasePtr();
14877 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14878 }
14879
14880 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14881 EVT VT;
14882 switch (N->getConstantOperandVal(1)) {
14883 default: return false;
14884 case Intrinsic::ppc_altivec_lvx:
14885 case Intrinsic::ppc_altivec_lvxl:
14886 case Intrinsic::ppc_vsx_lxvw4x:
14887 case Intrinsic::ppc_vsx_lxvw4x_be:
14888 VT = MVT::v4i32;
14889 break;
14890 case Intrinsic::ppc_vsx_lxvd2x:
14891 case Intrinsic::ppc_vsx_lxvd2x_be:
14892 VT = MVT::v2f64;
14893 break;
14894 case Intrinsic::ppc_altivec_lvebx:
14895 VT = MVT::i8;
14896 break;
14897 case Intrinsic::ppc_altivec_lvehx:
14898 VT = MVT::i16;
14899 break;
14900 case Intrinsic::ppc_altivec_lvewx:
14901 VT = MVT::i32;
14902 break;
14903 }
14904
14905 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14906 }
14907
14908 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14909 EVT VT;
14910 switch (N->getConstantOperandVal(1)) {
14911 default: return false;
14912 case Intrinsic::ppc_altivec_stvx:
14913 case Intrinsic::ppc_altivec_stvxl:
14914 case Intrinsic::ppc_vsx_stxvw4x:
14915 VT = MVT::v4i32;
14916 break;
14917 case Intrinsic::ppc_vsx_stxvd2x:
14918 VT = MVT::v2f64;
14919 break;
14920 case Intrinsic::ppc_vsx_stxvw4x_be:
14921 VT = MVT::v4i32;
14922 break;
14923 case Intrinsic::ppc_vsx_stxvd2x_be:
14924 VT = MVT::v2f64;
14925 break;
14926 case Intrinsic::ppc_altivec_stvebx:
14927 VT = MVT::i8;
14928 break;
14929 case Intrinsic::ppc_altivec_stvehx:
14930 VT = MVT::i16;
14931 break;
14932 case Intrinsic::ppc_altivec_stvewx:
14933 VT = MVT::i32;
14934 break;
14935 }
14936
14937 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14938 }
14939
14940 return false;
14941}
14942
14943// Return true is there is a nearyby consecutive load to the one provided
14944// (regardless of alignment). We search up and down the chain, looking though
14945// token factors and other loads (but nothing else). As a result, a true result
14946// indicates that it is safe to create a new consecutive load adjacent to the
14947// load provided.
14949 SDValue Chain = LD->getChain();
14950 EVT VT = LD->getMemoryVT();
14951
14952 SmallPtrSet<SDNode *, 16> LoadRoots;
14953 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14955
14956 // First, search up the chain, branching to follow all token-factor operands.
14957 // If we find a consecutive load, then we're done, otherwise, record all
14958 // nodes just above the top-level loads and token factors.
14959 while (!Queue.empty()) {
14960 SDNode *ChainNext = Queue.pop_back_val();
14961 if (!Visited.insert(ChainNext).second)
14962 continue;
14963
14964 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14965 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14966 return true;
14967
14968 if (!Visited.count(ChainLD->getChain().getNode()))
14969 Queue.push_back(ChainLD->getChain().getNode());
14970 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14971 for (const SDUse &O : ChainNext->ops())
14972 if (!Visited.count(O.getNode()))
14973 Queue.push_back(O.getNode());
14974 } else
14975 LoadRoots.insert(ChainNext);
14976 }
14977
14978 // Second, search down the chain, starting from the top-level nodes recorded
14979 // in the first phase. These top-level nodes are the nodes just above all
14980 // loads and token factors. Starting with their uses, recursively look though
14981 // all loads (just the chain uses) and token factors to find a consecutive
14982 // load.
14983 Visited.clear();
14984 Queue.clear();
14985
14986 for (SDNode *I : LoadRoots) {
14987 Queue.push_back(I);
14988
14989 while (!Queue.empty()) {
14990 SDNode *LoadRoot = Queue.pop_back_val();
14991 if (!Visited.insert(LoadRoot).second)
14992 continue;
14993
14994 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14995 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14996 return true;
14997
14998 for (SDNode *U : LoadRoot->users())
14999 if (((isa<MemSDNode>(U) &&
15000 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15001 U->getOpcode() == ISD::TokenFactor) &&
15002 !Visited.count(U))
15003 Queue.push_back(U);
15004 }
15005 }
15006
15007 return false;
15008}
15009
15010/// This function is called when we have proved that a SETCC node can be replaced
15011/// by subtraction (and other supporting instructions) so that the result of
15012/// comparison is kept in a GPR instead of CR. This function is purely for
15013/// codegen purposes and has some flags to guide the codegen process.
15014static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15015 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15016 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15017
15018 // Zero extend the operands to the largest legal integer. Originally, they
15019 // must be of a strictly smaller size.
15020 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15021 DAG.getConstant(Size, DL, MVT::i32));
15022 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15023 DAG.getConstant(Size, DL, MVT::i32));
15024
15025 // Swap if needed. Depends on the condition code.
15026 if (Swap)
15027 std::swap(Op0, Op1);
15028
15029 // Subtract extended integers.
15030 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15031
15032 // Move the sign bit to the least significant position and zero out the rest.
15033 // Now the least significant bit carries the result of original comparison.
15034 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15035 DAG.getConstant(Size - 1, DL, MVT::i32));
15036 auto Final = Shifted;
15037
15038 // Complement the result if needed. Based on the condition code.
15039 if (Complement)
15040 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15041 DAG.getConstant(1, DL, MVT::i64));
15042
15043 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15044}
15045
15046SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15047 DAGCombinerInfo &DCI) const {
15048 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15049
15050 SelectionDAG &DAG = DCI.DAG;
15051 SDLoc DL(N);
15052
15053 // Size of integers being compared has a critical role in the following
15054 // analysis, so we prefer to do this when all types are legal.
15055 if (!DCI.isAfterLegalizeDAG())
15056 return SDValue();
15057
15058 // If all users of SETCC extend its value to a legal integer type
15059 // then we replace SETCC with a subtraction
15060 for (const SDNode *U : N->users())
15061 if (U->getOpcode() != ISD::ZERO_EXTEND)
15062 return SDValue();
15063
15064 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15065 auto OpSize = N->getOperand(0).getValueSizeInBits();
15066
15068
15069 if (OpSize < Size) {
15070 switch (CC) {
15071 default: break;
15072 case ISD::SETULT:
15073 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15074 case ISD::SETULE:
15075 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15076 case ISD::SETUGT:
15077 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15078 case ISD::SETUGE:
15079 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15080 }
15081 }
15082
15083 return SDValue();
15084}
15085
15086SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15087 DAGCombinerInfo &DCI) const {
15088 SelectionDAG &DAG = DCI.DAG;
15089 SDLoc dl(N);
15090
15091 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15092 // If we're tracking CR bits, we need to be careful that we don't have:
15093 // trunc(binary-ops(zext(x), zext(y)))
15094 // or
15095 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15096 // such that we're unnecessarily moving things into GPRs when it would be
15097 // better to keep them in CR bits.
15098
15099 // Note that trunc here can be an actual i1 trunc, or can be the effective
15100 // truncation that comes from a setcc or select_cc.
15101 if (N->getOpcode() == ISD::TRUNCATE &&
15102 N->getValueType(0) != MVT::i1)
15103 return SDValue();
15104
15105 if (N->getOperand(0).getValueType() != MVT::i32 &&
15106 N->getOperand(0).getValueType() != MVT::i64)
15107 return SDValue();
15108
15109 if (N->getOpcode() == ISD::SETCC ||
15110 N->getOpcode() == ISD::SELECT_CC) {
15111 // If we're looking at a comparison, then we need to make sure that the
15112 // high bits (all except for the first) don't matter the result.
15113 ISD::CondCode CC =
15114 cast<CondCodeSDNode>(N->getOperand(
15115 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15116 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15117
15118 if (ISD::isSignedIntSetCC(CC)) {
15119 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15120 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15121 return SDValue();
15122 } else if (ISD::isUnsignedIntSetCC(CC)) {
15123 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15124 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15125 !DAG.MaskedValueIsZero(N->getOperand(1),
15126 APInt::getHighBitsSet(OpBits, OpBits-1)))
15127 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15128 : SDValue());
15129 } else {
15130 // This is neither a signed nor an unsigned comparison, just make sure
15131 // that the high bits are equal.
15132 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15133 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15134
15135 // We don't really care about what is known about the first bit (if
15136 // anything), so pretend that it is known zero for both to ensure they can
15137 // be compared as constants.
15138 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15139 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15140
15141 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15142 Op1Known.getConstant() != Op2Known.getConstant())
15143 return SDValue();
15144 }
15145 }
15146
15147 // We now know that the higher-order bits are irrelevant, we just need to
15148 // make sure that all of the intermediate operations are bit operations, and
15149 // all inputs are extensions.
15150 if (N->getOperand(0).getOpcode() != ISD::AND &&
15151 N->getOperand(0).getOpcode() != ISD::OR &&
15152 N->getOperand(0).getOpcode() != ISD::XOR &&
15153 N->getOperand(0).getOpcode() != ISD::SELECT &&
15154 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15155 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15156 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15157 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15158 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15159 return SDValue();
15160
15161 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15162 N->getOperand(1).getOpcode() != ISD::AND &&
15163 N->getOperand(1).getOpcode() != ISD::OR &&
15164 N->getOperand(1).getOpcode() != ISD::XOR &&
15165 N->getOperand(1).getOpcode() != ISD::SELECT &&
15166 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15167 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15168 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15169 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15170 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15171 return SDValue();
15172
15174 SmallVector<SDValue, 8> BinOps, PromOps;
15175 SmallPtrSet<SDNode *, 16> Visited;
15176
15177 for (unsigned i = 0; i < 2; ++i) {
15178 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15179 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15180 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15181 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15182 isa<ConstantSDNode>(N->getOperand(i)))
15183 Inputs.push_back(N->getOperand(i));
15184 else
15185 BinOps.push_back(N->getOperand(i));
15186
15187 if (N->getOpcode() == ISD::TRUNCATE)
15188 break;
15189 }
15190
15191 // Visit all inputs, collect all binary operations (and, or, xor and
15192 // select) that are all fed by extensions.
15193 while (!BinOps.empty()) {
15194 SDValue BinOp = BinOps.pop_back_val();
15195
15196 if (!Visited.insert(BinOp.getNode()).second)
15197 continue;
15198
15199 PromOps.push_back(BinOp);
15200
15201 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15202 // The condition of the select is not promoted.
15203 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15204 continue;
15205 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15206 continue;
15207
15208 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15209 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15210 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15211 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15212 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15213 Inputs.push_back(BinOp.getOperand(i));
15214 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15215 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15216 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15217 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15218 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15219 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15220 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15221 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15222 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15223 BinOps.push_back(BinOp.getOperand(i));
15224 } else {
15225 // We have an input that is not an extension or another binary
15226 // operation; we'll abort this transformation.
15227 return SDValue();
15228 }
15229 }
15230 }
15231
15232 // Make sure that this is a self-contained cluster of operations (which
15233 // is not quite the same thing as saying that everything has only one
15234 // use).
15235 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15236 if (isa<ConstantSDNode>(Inputs[i]))
15237 continue;
15238
15239 for (const SDNode *User : Inputs[i].getNode()->users()) {
15240 if (User != N && !Visited.count(User))
15241 return SDValue();
15242
15243 // Make sure that we're not going to promote the non-output-value
15244 // operand(s) or SELECT or SELECT_CC.
15245 // FIXME: Although we could sometimes handle this, and it does occur in
15246 // practice that one of the condition inputs to the select is also one of
15247 // the outputs, we currently can't deal with this.
15248 if (User->getOpcode() == ISD::SELECT) {
15249 if (User->getOperand(0) == Inputs[i])
15250 return SDValue();
15251 } else if (User->getOpcode() == ISD::SELECT_CC) {
15252 if (User->getOperand(0) == Inputs[i] ||
15253 User->getOperand(1) == Inputs[i])
15254 return SDValue();
15255 }
15256 }
15257 }
15258
15259 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15260 for (const SDNode *User : PromOps[i].getNode()->users()) {
15261 if (User != N && !Visited.count(User))
15262 return SDValue();
15263
15264 // Make sure that we're not going to promote the non-output-value
15265 // operand(s) or SELECT or SELECT_CC.
15266 // FIXME: Although we could sometimes handle this, and it does occur in
15267 // practice that one of the condition inputs to the select is also one of
15268 // the outputs, we currently can't deal with this.
15269 if (User->getOpcode() == ISD::SELECT) {
15270 if (User->getOperand(0) == PromOps[i])
15271 return SDValue();
15272 } else if (User->getOpcode() == ISD::SELECT_CC) {
15273 if (User->getOperand(0) == PromOps[i] ||
15274 User->getOperand(1) == PromOps[i])
15275 return SDValue();
15276 }
15277 }
15278 }
15279
15280 // Replace all inputs with the extension operand.
15281 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15282 // Constants may have users outside the cluster of to-be-promoted nodes,
15283 // and so we need to replace those as we do the promotions.
15284 if (isa<ConstantSDNode>(Inputs[i]))
15285 continue;
15286 else
15287 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15288 }
15289
15290 std::list<HandleSDNode> PromOpHandles;
15291 for (auto &PromOp : PromOps)
15292 PromOpHandles.emplace_back(PromOp);
15293
15294 // Replace all operations (these are all the same, but have a different
15295 // (i1) return type). DAG.getNode will validate that the types of
15296 // a binary operator match, so go through the list in reverse so that
15297 // we've likely promoted both operands first. Any intermediate truncations or
15298 // extensions disappear.
15299 while (!PromOpHandles.empty()) {
15300 SDValue PromOp = PromOpHandles.back().getValue();
15301 PromOpHandles.pop_back();
15302
15303 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15304 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15305 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15306 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15307 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15308 PromOp.getOperand(0).getValueType() != MVT::i1) {
15309 // The operand is not yet ready (see comment below).
15310 PromOpHandles.emplace_front(PromOp);
15311 continue;
15312 }
15313
15314 SDValue RepValue = PromOp.getOperand(0);
15315 if (isa<ConstantSDNode>(RepValue))
15316 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15317
15318 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15319 continue;
15320 }
15321
15322 unsigned C;
15323 switch (PromOp.getOpcode()) {
15324 default: C = 0; break;
15325 case ISD::SELECT: C = 1; break;
15326 case ISD::SELECT_CC: C = 2; break;
15327 }
15328
15329 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15330 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15331 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15332 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15333 // The to-be-promoted operands of this node have not yet been
15334 // promoted (this should be rare because we're going through the
15335 // list backward, but if one of the operands has several users in
15336 // this cluster of to-be-promoted nodes, it is possible).
15337 PromOpHandles.emplace_front(PromOp);
15338 continue;
15339 }
15340
15342
15343 // If there are any constant inputs, make sure they're replaced now.
15344 for (unsigned i = 0; i < 2; ++i)
15345 if (isa<ConstantSDNode>(Ops[C+i]))
15346 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15347
15348 DAG.ReplaceAllUsesOfValueWith(PromOp,
15349 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15350 }
15351
15352 // Now we're left with the initial truncation itself.
15353 if (N->getOpcode() == ISD::TRUNCATE)
15354 return N->getOperand(0);
15355
15356 // Otherwise, this is a comparison. The operands to be compared have just
15357 // changed type (to i1), but everything else is the same.
15358 return SDValue(N, 0);
15359}
15360
15361SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15362 DAGCombinerInfo &DCI) const {
15363 SelectionDAG &DAG = DCI.DAG;
15364 SDLoc dl(N);
15365
15366 // If we're tracking CR bits, we need to be careful that we don't have:
15367 // zext(binary-ops(trunc(x), trunc(y)))
15368 // or
15369 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15370 // such that we're unnecessarily moving things into CR bits that can more
15371 // efficiently stay in GPRs. Note that if we're not certain that the high
15372 // bits are set as required by the final extension, we still may need to do
15373 // some masking to get the proper behavior.
15374
15375 // This same functionality is important on PPC64 when dealing with
15376 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15377 // the return values of functions. Because it is so similar, it is handled
15378 // here as well.
15379
15380 if (N->getValueType(0) != MVT::i32 &&
15381 N->getValueType(0) != MVT::i64)
15382 return SDValue();
15383
15384 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15385 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15386 return SDValue();
15387
15388 if (N->getOperand(0).getOpcode() != ISD::AND &&
15389 N->getOperand(0).getOpcode() != ISD::OR &&
15390 N->getOperand(0).getOpcode() != ISD::XOR &&
15391 N->getOperand(0).getOpcode() != ISD::SELECT &&
15392 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15393 return SDValue();
15394
15396 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15397 SmallPtrSet<SDNode *, 16> Visited;
15398
15399 // Visit all inputs, collect all binary operations (and, or, xor and
15400 // select) that are all fed by truncations.
15401 while (!BinOps.empty()) {
15402 SDValue BinOp = BinOps.pop_back_val();
15403
15404 if (!Visited.insert(BinOp.getNode()).second)
15405 continue;
15406
15407 PromOps.push_back(BinOp);
15408
15409 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15410 // The condition of the select is not promoted.
15411 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15412 continue;
15413 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15414 continue;
15415
15416 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15417 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15418 Inputs.push_back(BinOp.getOperand(i));
15419 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15420 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15421 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15422 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15423 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15424 BinOps.push_back(BinOp.getOperand(i));
15425 } else {
15426 // We have an input that is not a truncation or another binary
15427 // operation; we'll abort this transformation.
15428 return SDValue();
15429 }
15430 }
15431 }
15432
15433 // The operands of a select that must be truncated when the select is
15434 // promoted because the operand is actually part of the to-be-promoted set.
15435 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15436
15437 // Make sure that this is a self-contained cluster of operations (which
15438 // is not quite the same thing as saying that everything has only one
15439 // use).
15440 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15441 if (isa<ConstantSDNode>(Inputs[i]))
15442 continue;
15443
15444 for (SDNode *User : Inputs[i].getNode()->users()) {
15445 if (User != N && !Visited.count(User))
15446 return SDValue();
15447
15448 // If we're going to promote the non-output-value operand(s) or SELECT or
15449 // SELECT_CC, record them for truncation.
15450 if (User->getOpcode() == ISD::SELECT) {
15451 if (User->getOperand(0) == Inputs[i])
15452 SelectTruncOp[0].insert(std::make_pair(User,
15453 User->getOperand(0).getValueType()));
15454 } else if (User->getOpcode() == ISD::SELECT_CC) {
15455 if (User->getOperand(0) == Inputs[i])
15456 SelectTruncOp[0].insert(std::make_pair(User,
15457 User->getOperand(0).getValueType()));
15458 if (User->getOperand(1) == Inputs[i])
15459 SelectTruncOp[1].insert(std::make_pair(User,
15460 User->getOperand(1).getValueType()));
15461 }
15462 }
15463 }
15464
15465 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15466 for (SDNode *User : PromOps[i].getNode()->users()) {
15467 if (User != N && !Visited.count(User))
15468 return SDValue();
15469
15470 // If we're going to promote the non-output-value operand(s) or SELECT or
15471 // SELECT_CC, record them for truncation.
15472 if (User->getOpcode() == ISD::SELECT) {
15473 if (User->getOperand(0) == PromOps[i])
15474 SelectTruncOp[0].insert(std::make_pair(User,
15475 User->getOperand(0).getValueType()));
15476 } else if (User->getOpcode() == ISD::SELECT_CC) {
15477 if (User->getOperand(0) == PromOps[i])
15478 SelectTruncOp[0].insert(std::make_pair(User,
15479 User->getOperand(0).getValueType()));
15480 if (User->getOperand(1) == PromOps[i])
15481 SelectTruncOp[1].insert(std::make_pair(User,
15482 User->getOperand(1).getValueType()));
15483 }
15484 }
15485 }
15486
15487 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15488 bool ReallyNeedsExt = false;
15489 if (N->getOpcode() != ISD::ANY_EXTEND) {
15490 // If all of the inputs are not already sign/zero extended, then
15491 // we'll still need to do that at the end.
15492 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15493 if (isa<ConstantSDNode>(Inputs[i]))
15494 continue;
15495
15496 unsigned OpBits =
15497 Inputs[i].getOperand(0).getValueSizeInBits();
15498 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15499
15500 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15501 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15502 APInt::getHighBitsSet(OpBits,
15503 OpBits-PromBits))) ||
15504 (N->getOpcode() == ISD::SIGN_EXTEND &&
15505 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15506 (OpBits-(PromBits-1)))) {
15507 ReallyNeedsExt = true;
15508 break;
15509 }
15510 }
15511 }
15512
15513 // Convert PromOps to handles before doing any RAUW operations, as these
15514 // may CSE with existing nodes, deleting the originals.
15515 std::list<HandleSDNode> PromOpHandles;
15516 for (auto &PromOp : PromOps)
15517 PromOpHandles.emplace_back(PromOp);
15518
15519 // Replace all inputs, either with the truncation operand, or a
15520 // truncation or extension to the final output type.
15521 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15522 // Constant inputs need to be replaced with the to-be-promoted nodes that
15523 // use them because they might have users outside of the cluster of
15524 // promoted nodes.
15525 if (isa<ConstantSDNode>(Inputs[i]))
15526 continue;
15527
15528 SDValue InSrc = Inputs[i].getOperand(0);
15529 if (Inputs[i].getValueType() == N->getValueType(0))
15530 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15531 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15532 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15533 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15534 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15535 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15536 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15537 else
15538 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15539 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15540 }
15541
15542 // Replace all operations (these are all the same, but have a different
15543 // (promoted) return type). DAG.getNode will validate that the types of
15544 // a binary operator match, so go through the list in reverse so that
15545 // we've likely promoted both operands first.
15546 while (!PromOpHandles.empty()) {
15547 SDValue PromOp = PromOpHandles.back().getValue();
15548 PromOpHandles.pop_back();
15549
15550 unsigned C;
15551 switch (PromOp.getOpcode()) {
15552 default: C = 0; break;
15553 case ISD::SELECT: C = 1; break;
15554 case ISD::SELECT_CC: C = 2; break;
15555 }
15556
15557 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15558 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15559 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15560 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15561 // The to-be-promoted operands of this node have not yet been
15562 // promoted (this should be rare because we're going through the
15563 // list backward, but if one of the operands has several users in
15564 // this cluster of to-be-promoted nodes, it is possible).
15565 PromOpHandles.emplace_front(PromOp);
15566 continue;
15567 }
15568
15569 // For SELECT and SELECT_CC nodes, we do a similar check for any
15570 // to-be-promoted comparison inputs.
15571 if (PromOp.getOpcode() == ISD::SELECT ||
15572 PromOp.getOpcode() == ISD::SELECT_CC) {
15573 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15574 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15575 (SelectTruncOp[1].count(PromOp.getNode()) &&
15576 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15577 PromOpHandles.emplace_front(PromOp);
15578 continue;
15579 }
15580 }
15581
15583
15584 // If this node has constant inputs, then they'll need to be promoted here.
15585 for (unsigned i = 0; i < 2; ++i) {
15586 if (!isa<ConstantSDNode>(Ops[C+i]))
15587 continue;
15588 if (Ops[C+i].getValueType() == N->getValueType(0))
15589 continue;
15590
15591 if (N->getOpcode() == ISD::SIGN_EXTEND)
15592 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15593 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15594 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15595 else
15596 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15597 }
15598
15599 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15600 // truncate them again to the original value type.
15601 if (PromOp.getOpcode() == ISD::SELECT ||
15602 PromOp.getOpcode() == ISD::SELECT_CC) {
15603 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15604 if (SI0 != SelectTruncOp[0].end())
15605 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15606 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15607 if (SI1 != SelectTruncOp[1].end())
15608 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15609 }
15610
15611 DAG.ReplaceAllUsesOfValueWith(PromOp,
15612 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15613 }
15614
15615 // Now we're left with the initial extension itself.
15616 if (!ReallyNeedsExt)
15617 return N->getOperand(0);
15618
15619 // To zero extend, just mask off everything except for the first bit (in the
15620 // i1 case).
15621 if (N->getOpcode() == ISD::ZERO_EXTEND)
15622 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15624 N->getValueSizeInBits(0), PromBits),
15625 dl, N->getValueType(0)));
15626
15627 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15628 "Invalid extension type");
15629 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15630 SDValue ShiftCst =
15631 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15632 return DAG.getNode(
15633 ISD::SRA, dl, N->getValueType(0),
15634 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15635 ShiftCst);
15636}
15637
15638// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15640
15641 auto isValidForConvert = [](SDValue &Operand) {
15642 if (!Operand.hasOneUse())
15643 return false;
15644
15645 if (Operand.getValueType() != MVT::i128)
15646 return false;
15647
15648 if (Operand.getOpcode() == ISD::Constant)
15649 return true;
15650
15651 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15652 if (!LoadNode)
15653 return false;
15654
15655 // If memory operation is volatile, do not perform any
15656 // optimization or transformation. Volatile operations must be preserved
15657 // as written to ensure correct program behavior, so we return an empty
15658 // SDValue to indicate no action.
15659
15660 if (LoadNode->isVolatile())
15661 return false;
15662
15663 // Only combine loads if both use the unindexed addressing mode.
15664 // PowerPC AltiVec/VMX does not support vector loads or stores with
15665 // pre/post-increment addressing. Indexed modes may imply implicit
15666 // pointer updates, which are not compatible with AltiVec vector
15667 // instructions.
15668 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15669 return false;
15670
15671 // Only combine loads if both are non-extending loads
15672 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15673 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15674 // loaded value's semantics and are not compatible with vector loads.
15675 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15676 return false;
15677
15678 return true;
15679 };
15680
15681 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15682}
15683
15685 const SDLoc &DL) {
15686
15687 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15688
15689 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15690 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15691 "CC mus be ISD::SETNE or ISD::SETEQ");
15692
15693 auto getV16i8Load = [&](const SDValue &Operand) {
15694 if (Operand.getOpcode() == ISD::Constant)
15695 return DAG.getBitcast(MVT::v16i8, Operand);
15696
15697 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15698
15699 auto *LoadNode = cast<LoadSDNode>(Operand);
15700 return DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15701 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15702 };
15703
15704 // Following code transforms the DAG
15705 // t0: ch,glue = EntryToken
15706 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15707 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15708 // undef:i64
15709 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15710 // t5: i128,ch =
15711 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15712 // setcc t3, t5, setne:ch
15713 //
15714 // ---->
15715 //
15716 // t0: ch,glue = EntryToken
15717 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15718 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15719 // undef:i64
15720 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15721 // t5: v16i8,ch =
15722 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15723 // t6: i32 =
15724 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15725 // Constant:i32<2>, t3, t5
15726 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15727
15728 // Or transforms the DAG
15729 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15730 // t8: i1 =
15731 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15732 //
15733 // --->
15734 //
15735 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15736 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15737 // t7: i32 =
15738 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15739
15740 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15741 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15742
15743 SDValue IntrID =
15744 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15745 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15746 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15747 IntrID, CRSel, LHSVec, RHSVec);
15748 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15749 // so we need to invert the CC opcode.
15750 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15751 DAG.getConstant(0, DL, MVT::i32),
15752 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15753}
15754
15755// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15756// If it is , return true; otherwise return false.
15758 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15759
15760 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15761 if (CC != ISD::SETEQ)
15762 return false;
15763
15764 SDValue LHS = N->getOperand(0);
15765 SDValue RHS = N->getOperand(1);
15766
15767 // Check the `SDValue &V` is from `and` with `1`.
15768 auto IsAndWithOne = [](SDValue &V) {
15769 if (V.getOpcode() == ISD::AND) {
15770 for (const SDValue &Op : V->ops())
15771 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15772 if (C->isOne())
15773 return true;
15774 }
15775 return false;
15776 };
15777
15778 // Check whether the SETCC compare with zero.
15779 auto IsCompareWithZero = [](SDValue &V) {
15780 if (auto *C = dyn_cast<ConstantSDNode>(V))
15781 if (C->isZero())
15782 return true;
15783 return false;
15784 };
15785
15786 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15787 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15788}
15789
15790// You must check whether the `SDNode* N` can be converted to Xori using
15791// the function `static bool canConvertSETCCToXori(SDNode *N)`
15792// before calling the function; otherwise, it may produce incorrect results.
15794
15795 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15796 SDValue LHS = N->getOperand(0);
15797 SDValue RHS = N->getOperand(1);
15798 SDLoc DL(N);
15799
15800 [[maybe_unused]] ISD::CondCode CC =
15801 cast<CondCodeSDNode>(N->getOperand(2))->get();
15802 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15803 // Rewrite it as XORI (and X, 1), 1.
15804 auto MakeXor1 = [&](SDValue V) {
15805 EVT VT = V.getValueType();
15806 SDValue One = DAG.getConstant(1, DL, VT);
15807 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15808 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15809 };
15810
15811 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15812 return MakeXor1(LHS);
15813
15814 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15815 return MakeXor1(RHS);
15816
15817 llvm_unreachable("Should not reach here.");
15818}
15819
15820SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15821 DAGCombinerInfo &DCI) const {
15822 assert(N->getOpcode() == ISD::SETCC &&
15823 "Should be called with a SETCC node");
15824
15825 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15826 // If it is, rewrite it as XORI (and X, 1), 1.
15828 return ConvertSETCCToXori(N, DCI.DAG);
15829
15830 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15831 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15832 SDValue LHS = N->getOperand(0);
15833 SDValue RHS = N->getOperand(1);
15834
15835 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15836 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15837 LHS.hasOneUse())
15838 std::swap(LHS, RHS);
15839
15840 // x == 0-y --> x+y == 0
15841 // x != 0-y --> x+y != 0
15842 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15843 RHS.hasOneUse()) {
15844 SDLoc DL(N);
15845 SelectionDAG &DAG = DCI.DAG;
15846 EVT VT = N->getValueType(0);
15847 EVT OpVT = LHS.getValueType();
15848 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15849 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15850 }
15851
15852 // Optimization: Fold i128 equality/inequality compares of two loads into a
15853 // vectorized compare using vcmpequb.p when Altivec is available.
15854 //
15855 // Rationale:
15856 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15857 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15858 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15859 // perform a full 128-bit equality check in a single vector compare.
15860 //
15861 // Example Result:
15862 // This transformation replaces memcmp(a, b, 16) with two vector loads
15863 // and one vector compare instruction.
15864
15865 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15866 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15867 }
15868
15869 return DAGCombineTruncBoolExt(N, DCI);
15870}
15871
15872// Is this an extending load from an f32 to an f64?
15873static bool isFPExtLoad(SDValue Op) {
15874 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15875 return LD->getExtensionType() == ISD::EXTLOAD &&
15876 Op.getValueType() == MVT::f64;
15877 return false;
15878}
15879
15880/// Reduces the number of fp-to-int conversion when building a vector.
15881///
15882/// If this vector is built out of floating to integer conversions,
15883/// transform it to a vector built out of floating point values followed by a
15884/// single floating to integer conversion of the vector.
15885/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15886/// becomes (fptosi (build_vector ($A, $B, ...)))
15887SDValue PPCTargetLowering::
15888combineElementTruncationToVectorTruncation(SDNode *N,
15889 DAGCombinerInfo &DCI) const {
15890 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15891 "Should be called with a BUILD_VECTOR node");
15892
15893 SelectionDAG &DAG = DCI.DAG;
15894 SDLoc dl(N);
15895
15896 SDValue FirstInput = N->getOperand(0);
15897 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15898 "The input operand must be an fp-to-int conversion.");
15899
15900 // This combine happens after legalization so the fp_to_[su]i nodes are
15901 // already converted to PPCSISD nodes.
15902 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15903 if (FirstConversion == PPCISD::FCTIDZ ||
15904 FirstConversion == PPCISD::FCTIDUZ ||
15905 FirstConversion == PPCISD::FCTIWZ ||
15906 FirstConversion == PPCISD::FCTIWUZ) {
15907 bool IsSplat = true;
15908 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15909 FirstConversion == PPCISD::FCTIWUZ;
15910 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15912 EVT TargetVT = N->getValueType(0);
15913 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15914 SDValue NextOp = N->getOperand(i);
15915 if (NextOp.getOpcode() != PPCISD::MFVSR)
15916 return SDValue();
15917 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15918 if (NextConversion != FirstConversion)
15919 return SDValue();
15920 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15921 // This is not valid if the input was originally double precision. It is
15922 // also not profitable to do unless this is an extending load in which
15923 // case doing this combine will allow us to combine consecutive loads.
15924 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15925 return SDValue();
15926 if (N->getOperand(i) != FirstInput)
15927 IsSplat = false;
15928 }
15929
15930 // If this is a splat, we leave it as-is since there will be only a single
15931 // fp-to-int conversion followed by a splat of the integer. This is better
15932 // for 32-bit and smaller ints and neutral for 64-bit ints.
15933 if (IsSplat)
15934 return SDValue();
15935
15936 // Now that we know we have the right type of node, get its operands
15937 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15938 SDValue In = N->getOperand(i).getOperand(0);
15939 if (Is32Bit) {
15940 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15941 // here, we know that all inputs are extending loads so this is safe).
15942 if (In.isUndef())
15943 Ops.push_back(DAG.getUNDEF(SrcVT));
15944 else {
15945 SDValue Trunc =
15946 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15947 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15948 Ops.push_back(Trunc);
15949 }
15950 } else
15951 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15952 }
15953
15954 unsigned Opcode;
15955 if (FirstConversion == PPCISD::FCTIDZ ||
15956 FirstConversion == PPCISD::FCTIWZ)
15957 Opcode = ISD::FP_TO_SINT;
15958 else
15959 Opcode = ISD::FP_TO_UINT;
15960
15961 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15962 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15963 return DAG.getNode(Opcode, dl, TargetVT, BV);
15964 }
15965 return SDValue();
15966}
15967
15968// LXVKQ instruction load VSX vector with a special quadword value
15969// based on an immediate value. This helper method returns the details of the
15970// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15971// to help generate the LXVKQ instruction and the subsequent shift instruction
15972// required to match the original build vector pattern.
15973
15974// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15975using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15976
15977static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15978
15979 // LXVKQ instruction loads the Quadword value:
15980 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15981 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15982 static const uint32_t Uim = 16;
15983
15984 // Check for direct LXVKQ match (no shift needed)
15985 if (FullVal == BasePattern)
15986 return std::make_tuple(Uim, uint8_t{0});
15987
15988 // Check if FullValue is 1 (the result of the base pattern >> 127)
15989 if (FullVal == APInt(128, 1))
15990 return std::make_tuple(Uim, uint8_t{127});
15991
15992 return std::nullopt;
15993}
15994
15995/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15996/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15997/// LXVKQ instruction load VSX vector with a special quadword value based on an
15998/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15999/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16000/// This can be used to inline the build vector constants that have the
16001/// following patterns:
16002///
16003/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16004/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16005/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16006/// combination of splatting and right shift instructions.
16007
16008SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16009 SelectionDAG &DAG) const {
16010
16011 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16012 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16013
16014 // This transformation is only supported if we are loading either a byte,
16015 // halfword, word, or doubleword.
16016 EVT VT = Op.getValueType();
16017 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16018 VT == MVT::v2i64))
16019 return SDValue();
16020
16021 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16022 << VT.getEVTString() << "): ";
16023 Op->dump());
16024
16025 unsigned NumElems = VT.getVectorNumElements();
16026 unsigned ElemBits = VT.getScalarSizeInBits();
16027
16028 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16029
16030 // Check for Non-constant operand in the build vector.
16031 for (const SDValue &Operand : Op.getNode()->op_values()) {
16032 if (!isa<ConstantSDNode>(Operand))
16033 return SDValue();
16034 }
16035
16036 // Assemble build vector operands as a 128-bit register value
16037 // We need to reconstruct what the 128-bit register pattern would be
16038 // that produces this vector when interpreted with the current endianness
16039 APInt FullVal = APInt::getZero(128);
16040
16041 for (unsigned Index = 0; Index < NumElems; ++Index) {
16042 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16043
16044 // Get element value as raw bits (zero-extended)
16045 uint64_t ElemValue = C->getZExtValue();
16046
16047 // Mask to element size to ensure we only get the relevant bits
16048 if (ElemBits < 64)
16049 ElemValue &= ((1ULL << ElemBits) - 1);
16050
16051 // Calculate bit position for this element in the 128-bit register
16052 unsigned BitPos =
16053 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16054
16055 // Create APInt for the element value and shift it to correct position
16056 APInt ElemAPInt(128, ElemValue);
16057 ElemAPInt <<= BitPos;
16058
16059 // Place the element value at the correct bit position
16060 FullVal |= ElemAPInt;
16061 }
16062
16063 if (FullVal.isZero() || FullVal.isAllOnes())
16064 return SDValue();
16065
16066 if (auto UIMOpt = getPatternInfo(FullVal)) {
16067 const auto &[Uim, ShiftAmount] = *UIMOpt;
16068 SDLoc Dl(Op);
16069
16070 // Generate LXVKQ instruction if the shift amount is zero.
16071 if (ShiftAmount == 0) {
16072 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16073 SDValue LxvkqInstr =
16074 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16076 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16077 LxvkqInstr.dump());
16078 return LxvkqInstr;
16079 }
16080
16081 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16082
16083 // The right shifted pattern can be constructed using a combination of
16084 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16085 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16086 // value 255.
16087 SDValue ShiftAmountVec =
16088 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16089 DAG.getTargetConstant(255, Dl, MVT::i32)),
16090 0);
16091 // Generate appropriate right shift instruction
16092 SDValue ShiftVec = SDValue(
16093 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16094 0);
16096 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16097 ShiftVec.dump());
16098 return ShiftVec;
16099 }
16100 // No patterns matched for build vectors.
16101 return SDValue();
16102}
16103
16104/// Reduce the number of loads when building a vector.
16105///
16106/// Building a vector out of multiple loads can be converted to a load
16107/// of the vector type if the loads are consecutive. If the loads are
16108/// consecutive but in descending order, a shuffle is added at the end
16109/// to reorder the vector.
16111 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16112 "Should be called with a BUILD_VECTOR node");
16113
16114 SDLoc dl(N);
16115
16116 // Return early for non byte-sized type, as they can't be consecutive.
16117 if (!N->getValueType(0).getVectorElementType().isByteSized())
16118 return SDValue();
16119
16120 bool InputsAreConsecutiveLoads = true;
16121 bool InputsAreReverseConsecutive = true;
16122 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16123 SDValue FirstInput = N->getOperand(0);
16124 bool IsRoundOfExtLoad = false;
16125 LoadSDNode *FirstLoad = nullptr;
16126
16127 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16128 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16129 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16130 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16131 }
16132 // Not a build vector of (possibly fp_rounded) loads.
16133 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16134 N->getNumOperands() == 1)
16135 return SDValue();
16136
16137 if (!IsRoundOfExtLoad)
16138 FirstLoad = cast<LoadSDNode>(FirstInput);
16139
16141 InputLoads.push_back(FirstLoad);
16142 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16143 // If any inputs are fp_round(extload), they all must be.
16144 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16145 return SDValue();
16146
16147 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16148 N->getOperand(i);
16149 if (NextInput.getOpcode() != ISD::LOAD)
16150 return SDValue();
16151
16152 SDValue PreviousInput =
16153 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16154 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16155 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16156
16157 // If any inputs are fp_round(extload), they all must be.
16158 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16159 return SDValue();
16160
16161 // We only care about regular loads. The PPC-specific load intrinsics
16162 // will not lead to a merge opportunity.
16163 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16164 InputsAreConsecutiveLoads = false;
16165 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16166 InputsAreReverseConsecutive = false;
16167
16168 // Exit early if the loads are neither consecutive nor reverse consecutive.
16169 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16170 return SDValue();
16171 InputLoads.push_back(LD2);
16172 }
16173
16174 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16175 "The loads cannot be both consecutive and reverse consecutive.");
16176
16177 SDValue WideLoad;
16178 SDValue ReturnSDVal;
16179 if (InputsAreConsecutiveLoads) {
16180 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16181 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16182 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16183 FirstLoad->getAlign());
16184 ReturnSDVal = WideLoad;
16185 } else if (InputsAreReverseConsecutive) {
16186 LoadSDNode *LastLoad = InputLoads.back();
16187 assert(LastLoad && "Input needs to be a LoadSDNode.");
16188 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16189 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16190 LastLoad->getAlign());
16192 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16193 Ops.push_back(i);
16194
16195 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16196 DAG.getUNDEF(N->getValueType(0)), Ops);
16197 } else
16198 return SDValue();
16199
16200 for (auto *LD : InputLoads)
16201 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16202 return ReturnSDVal;
16203}
16204
16205// This function adds the required vector_shuffle needed to get
16206// the elements of the vector extract in the correct position
16207// as specified by the CorrectElems encoding.
16209 SDValue Input, uint64_t Elems,
16210 uint64_t CorrectElems) {
16211 SDLoc dl(N);
16212
16213 unsigned NumElems = Input.getValueType().getVectorNumElements();
16214 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16215
16216 // Knowing the element indices being extracted from the original
16217 // vector and the order in which they're being inserted, just put
16218 // them at element indices required for the instruction.
16219 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16220 if (DAG.getDataLayout().isLittleEndian())
16221 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16222 else
16223 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16224 CorrectElems = CorrectElems >> 8;
16225 Elems = Elems >> 8;
16226 }
16227
16228 SDValue Shuffle =
16229 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16230 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16231
16232 EVT VT = N->getValueType(0);
16233 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16234
16235 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16236 Input.getValueType().getVectorElementType(),
16238 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16239 DAG.getValueType(ExtVT));
16240}
16241
16242// Look for build vector patterns where input operands come from sign
16243// extended vector_extract elements of specific indices. If the correct indices
16244// aren't used, add a vector shuffle to fix up the indices and create
16245// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16246// during instruction selection.
16248 // This array encodes the indices that the vector sign extend instructions
16249 // extract from when extending from one type to another for both BE and LE.
16250 // The right nibble of each byte corresponds to the LE incides.
16251 // and the left nibble of each byte corresponds to the BE incides.
16252 // For example: 0x3074B8FC byte->word
16253 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16254 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16255 // For example: 0x000070F8 byte->double word
16256 // For LE: the allowed indices are: 0x0,0x8
16257 // For BE: the allowed indices are: 0x7,0xF
16258 uint64_t TargetElems[] = {
16259 0x3074B8FC, // b->w
16260 0x000070F8, // b->d
16261 0x10325476, // h->w
16262 0x00003074, // h->d
16263 0x00001032, // w->d
16264 };
16265
16266 uint64_t Elems = 0;
16267 int Index;
16268 SDValue Input;
16269
16270 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16271 if (!Op)
16272 return false;
16273 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16274 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16275 return false;
16276
16277 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16278 // of the right width.
16279 SDValue Extract = Op.getOperand(0);
16280 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16281 Extract = Extract.getOperand(0);
16282 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16283 return false;
16284
16286 if (!ExtOp)
16287 return false;
16288
16289 Index = ExtOp->getZExtValue();
16290 if (Input && Input != Extract.getOperand(0))
16291 return false;
16292
16293 if (!Input)
16294 Input = Extract.getOperand(0);
16295
16296 Elems = Elems << 8;
16297 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16298 Elems |= Index;
16299
16300 return true;
16301 };
16302
16303 // If the build vector operands aren't sign extended vector extracts,
16304 // of the same input vector, then return.
16305 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16306 if (!isSExtOfVecExtract(N->getOperand(i))) {
16307 return SDValue();
16308 }
16309 }
16310
16311 // If the vector extract indices are not correct, add the appropriate
16312 // vector_shuffle.
16313 int TgtElemArrayIdx;
16314 int InputSize = Input.getValueType().getScalarSizeInBits();
16315 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16316 if (InputSize + OutputSize == 40)
16317 TgtElemArrayIdx = 0;
16318 else if (InputSize + OutputSize == 72)
16319 TgtElemArrayIdx = 1;
16320 else if (InputSize + OutputSize == 48)
16321 TgtElemArrayIdx = 2;
16322 else if (InputSize + OutputSize == 80)
16323 TgtElemArrayIdx = 3;
16324 else if (InputSize + OutputSize == 96)
16325 TgtElemArrayIdx = 4;
16326 else
16327 return SDValue();
16328
16329 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16330 CorrectElems = DAG.getDataLayout().isLittleEndian()
16331 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16332 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16333 if (Elems != CorrectElems) {
16334 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16335 }
16336
16337 // Regular lowering will catch cases where a shuffle is not needed.
16338 return SDValue();
16339}
16340
16341// Look for the pattern of a load from a narrow width to i128, feeding
16342// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16343// (LXVRZX). This node represents a zero extending load that will be matched
16344// to the Load VSX Vector Rightmost instructions.
16346 SDLoc DL(N);
16347
16348 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16349 if (N->getValueType(0) != MVT::v1i128)
16350 return SDValue();
16351
16352 SDValue Operand = N->getOperand(0);
16353 // Proceed with the transformation if the operand to the BUILD_VECTOR
16354 // is a load instruction.
16355 if (Operand.getOpcode() != ISD::LOAD)
16356 return SDValue();
16357
16358 auto *LD = cast<LoadSDNode>(Operand);
16359 EVT MemoryType = LD->getMemoryVT();
16360
16361 // This transformation is only valid if the we are loading either a byte,
16362 // halfword, word, or doubleword.
16363 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16364 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16365
16366 // Ensure that the load from the narrow width is being zero extended to i128.
16367 if (!ValidLDType ||
16368 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16369 LD->getExtensionType() != ISD::EXTLOAD))
16370 return SDValue();
16371
16372 SDValue LoadOps[] = {
16373 LD->getChain(), LD->getBasePtr(),
16374 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16375
16376 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16377 DAG.getVTList(MVT::v1i128, MVT::Other),
16378 LoadOps, MemoryType, LD->getMemOperand());
16379}
16380
16381SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16382 DAGCombinerInfo &DCI) const {
16383 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16384 "Should be called with a BUILD_VECTOR node");
16385
16386 SelectionDAG &DAG = DCI.DAG;
16387 SDLoc dl(N);
16388
16389 if (!Subtarget.hasVSX())
16390 return SDValue();
16391
16392 // The target independent DAG combiner will leave a build_vector of
16393 // float-to-int conversions intact. We can generate MUCH better code for
16394 // a float-to-int conversion of a vector of floats.
16395 SDValue FirstInput = N->getOperand(0);
16396 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16397 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16398 if (Reduced)
16399 return Reduced;
16400 }
16401
16402 // If we're building a vector out of consecutive loads, just load that
16403 // vector type.
16404 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16405 if (Reduced)
16406 return Reduced;
16407
16408 // If we're building a vector out of extended elements from another vector
16409 // we have P9 vector integer extend instructions. The code assumes legal
16410 // input types (i.e. it can't handle things like v4i16) so do not run before
16411 // legalization.
16412 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16413 Reduced = combineBVOfVecSExt(N, DAG);
16414 if (Reduced)
16415 return Reduced;
16416 }
16417
16418 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16419 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16420 // is a load from <valid narrow width> to i128.
16421 if (Subtarget.isISA3_1()) {
16422 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16423 if (BVOfZLoad)
16424 return BVOfZLoad;
16425 }
16426
16427 if (N->getValueType(0) != MVT::v2f64)
16428 return SDValue();
16429
16430 // Looking for:
16431 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16432 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16433 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16434 return SDValue();
16435 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16436 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16437 return SDValue();
16438 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16439 return SDValue();
16440
16441 SDValue Ext1 = FirstInput.getOperand(0);
16442 SDValue Ext2 = N->getOperand(1).getOperand(0);
16443 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16445 return SDValue();
16446
16447 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16448 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16449 if (!Ext1Op || !Ext2Op)
16450 return SDValue();
16451 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16452 Ext1.getOperand(0) != Ext2.getOperand(0))
16453 return SDValue();
16454
16455 int FirstElem = Ext1Op->getZExtValue();
16456 int SecondElem = Ext2Op->getZExtValue();
16457 int SubvecIdx;
16458 if (FirstElem == 0 && SecondElem == 1)
16459 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16460 else if (FirstElem == 2 && SecondElem == 3)
16461 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16462 else
16463 return SDValue();
16464
16465 SDValue SrcVec = Ext1.getOperand(0);
16466 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16467 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16468 return DAG.getNode(NodeType, dl, MVT::v2f64,
16469 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16470}
16471
16472SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16473 DAGCombinerInfo &DCI) const {
16474 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16475 N->getOpcode() == ISD::UINT_TO_FP) &&
16476 "Need an int -> FP conversion node here");
16477
16478 if (useSoftFloat() || !Subtarget.has64BitSupport())
16479 return SDValue();
16480
16481 SelectionDAG &DAG = DCI.DAG;
16482 SDLoc dl(N);
16483 SDValue Op(N, 0);
16484
16485 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16486 // from the hardware.
16487 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16488 return SDValue();
16489 if (!Op.getOperand(0).getValueType().isSimple())
16490 return SDValue();
16491 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16492 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16493 return SDValue();
16494
16495 SDValue FirstOperand(Op.getOperand(0));
16496 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16497 (FirstOperand.getValueType() == MVT::i8 ||
16498 FirstOperand.getValueType() == MVT::i16);
16499 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16500 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16501 bool DstDouble = Op.getValueType() == MVT::f64;
16502 unsigned ConvOp = Signed ?
16503 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16504 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16505 SDValue WidthConst =
16506 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16507 dl, false);
16508 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16509 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16510 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16511 DAG.getVTList(MVT::f64, MVT::Other),
16512 Ops, MVT::i8, LDN->getMemOperand());
16513 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16514
16515 // For signed conversion, we need to sign-extend the value in the VSR
16516 if (Signed) {
16517 SDValue ExtOps[] = { Ld, WidthConst };
16518 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16519 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16520 } else
16521 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16522 }
16523
16524
16525 // For i32 intermediate values, unfortunately, the conversion functions
16526 // leave the upper 32 bits of the value are undefined. Within the set of
16527 // scalar instructions, we have no method for zero- or sign-extending the
16528 // value. Thus, we cannot handle i32 intermediate values here.
16529 if (Op.getOperand(0).getValueType() == MVT::i32)
16530 return SDValue();
16531
16532 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16533 "UINT_TO_FP is supported only with FPCVT");
16534
16535 // If we have FCFIDS, then use it when converting to single-precision.
16536 // Otherwise, convert to double-precision and then round.
16537 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16538 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16539 : PPCISD::FCFIDS)
16540 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16541 : PPCISD::FCFID);
16542 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16543 ? MVT::f32
16544 : MVT::f64;
16545
16546 // If we're converting from a float, to an int, and back to a float again,
16547 // then we don't need the store/load pair at all.
16548 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16549 Subtarget.hasFPCVT()) ||
16550 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16551 SDValue Src = Op.getOperand(0).getOperand(0);
16552 if (Src.getValueType() == MVT::f32) {
16553 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16554 DCI.AddToWorklist(Src.getNode());
16555 } else if (Src.getValueType() != MVT::f64) {
16556 // Make sure that we don't pick up a ppc_fp128 source value.
16557 return SDValue();
16558 }
16559
16560 unsigned FCTOp =
16561 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16562 PPCISD::FCTIDUZ;
16563
16564 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16565 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16566
16567 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16568 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16569 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16570 DCI.AddToWorklist(FP.getNode());
16571 }
16572
16573 return FP;
16574 }
16575
16576 return SDValue();
16577}
16578
16579// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16580// builtins) into loads with swaps.
16582 DAGCombinerInfo &DCI) const {
16583 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16584 // load combines.
16585 if (DCI.isBeforeLegalizeOps())
16586 return SDValue();
16587
16588 SelectionDAG &DAG = DCI.DAG;
16589 SDLoc dl(N);
16590 SDValue Chain;
16591 SDValue Base;
16592 MachineMemOperand *MMO;
16593
16594 switch (N->getOpcode()) {
16595 default:
16596 llvm_unreachable("Unexpected opcode for little endian VSX load");
16597 case ISD::LOAD: {
16599 Chain = LD->getChain();
16600 Base = LD->getBasePtr();
16601 MMO = LD->getMemOperand();
16602 // If the MMO suggests this isn't a load of a full vector, leave
16603 // things alone. For a built-in, we have to make the change for
16604 // correctness, so if there is a size problem that will be a bug.
16605 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16606 return SDValue();
16607 break;
16608 }
16611 Chain = Intrin->getChain();
16612 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16613 // us what we want. Get operand 2 instead.
16614 Base = Intrin->getOperand(2);
16615 MMO = Intrin->getMemOperand();
16616 break;
16617 }
16618 }
16619
16620 MVT VecTy = N->getValueType(0).getSimpleVT();
16621
16622 SDValue LoadOps[] = { Chain, Base };
16623 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16624 DAG.getVTList(MVT::v2f64, MVT::Other),
16625 LoadOps, MVT::v2f64, MMO);
16626
16627 DCI.AddToWorklist(Load.getNode());
16628 Chain = Load.getValue(1);
16629 SDValue Swap = DAG.getNode(
16630 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16631 DCI.AddToWorklist(Swap.getNode());
16632
16633 // Add a bitcast if the resulting load type doesn't match v2f64.
16634 if (VecTy != MVT::v2f64) {
16635 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16636 DCI.AddToWorklist(N.getNode());
16637 // Package {bitcast value, swap's chain} to match Load's shape.
16638 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16639 N, Swap.getValue(1));
16640 }
16641
16642 return Swap;
16643}
16644
16645// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16646// builtins) into stores with swaps.
16648 DAGCombinerInfo &DCI) const {
16649 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16650 // store combines.
16651 if (DCI.isBeforeLegalizeOps())
16652 return SDValue();
16653
16654 SelectionDAG &DAG = DCI.DAG;
16655 SDLoc dl(N);
16656 SDValue Chain;
16657 SDValue Base;
16658 unsigned SrcOpnd;
16659 MachineMemOperand *MMO;
16660
16661 switch (N->getOpcode()) {
16662 default:
16663 llvm_unreachable("Unexpected opcode for little endian VSX store");
16664 case ISD::STORE: {
16666 Chain = ST->getChain();
16667 Base = ST->getBasePtr();
16668 MMO = ST->getMemOperand();
16669 SrcOpnd = 1;
16670 // If the MMO suggests this isn't a store of a full vector, leave
16671 // things alone. For a built-in, we have to make the change for
16672 // correctness, so if there is a size problem that will be a bug.
16673 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16674 return SDValue();
16675 break;
16676 }
16677 case ISD::INTRINSIC_VOID: {
16679 Chain = Intrin->getChain();
16680 // Intrin->getBasePtr() oddly does not get what we want.
16681 Base = Intrin->getOperand(3);
16682 MMO = Intrin->getMemOperand();
16683 SrcOpnd = 2;
16684 break;
16685 }
16686 }
16687
16688 SDValue Src = N->getOperand(SrcOpnd);
16689 MVT VecTy = Src.getValueType().getSimpleVT();
16690
16691 // All stores are done as v2f64 and possible bit cast.
16692 if (VecTy != MVT::v2f64) {
16693 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16694 DCI.AddToWorklist(Src.getNode());
16695 }
16696
16697 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16698 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16699 DCI.AddToWorklist(Swap.getNode());
16700 Chain = Swap.getValue(1);
16701 SDValue StoreOps[] = { Chain, Swap, Base };
16702 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16703 DAG.getVTList(MVT::Other),
16704 StoreOps, VecTy, MMO);
16705 DCI.AddToWorklist(Store.getNode());
16706 return Store;
16707}
16708
16709// Handle DAG combine for STORE (FP_TO_INT F).
16710SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16711 DAGCombinerInfo &DCI) const {
16712 SelectionDAG &DAG = DCI.DAG;
16713 SDLoc dl(N);
16714 unsigned Opcode = N->getOperand(1).getOpcode();
16715 (void)Opcode;
16716 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16717
16718 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16719 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16720 && "Not a FP_TO_INT Instruction!");
16721
16722 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16723 EVT Op1VT = N->getOperand(1).getValueType();
16724 EVT ResVT = Val.getValueType();
16725
16726 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16727 return SDValue();
16728
16729 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16730 bool ValidTypeForStoreFltAsInt =
16731 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16732 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16733
16734 // TODO: Lower conversion from f128 on all VSX targets
16735 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16736 return SDValue();
16737
16738 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16739 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16740 return SDValue();
16741
16742 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16743
16744 // Set number of bytes being converted.
16745 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16746 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16747 DAG.getIntPtrConstant(ByteSize, dl, false),
16748 DAG.getValueType(Op1VT)};
16749
16750 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16751 DAG.getVTList(MVT::Other), Ops,
16752 cast<StoreSDNode>(N)->getMemoryVT(),
16753 cast<StoreSDNode>(N)->getMemOperand());
16754
16755 return Val;
16756}
16757
16758static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16759 // Check that the source of the element keeps flipping
16760 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16761 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16762 for (int i = 1, e = Mask.size(); i < e; i++) {
16763 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16764 return false;
16765 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16766 return false;
16767 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16768 }
16769 return true;
16770}
16771
16772static bool isSplatBV(SDValue Op) {
16773 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16774 return false;
16775 SDValue FirstOp;
16776
16777 // Find first non-undef input.
16778 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16779 FirstOp = Op.getOperand(i);
16780 if (!FirstOp.isUndef())
16781 break;
16782 }
16783
16784 // All inputs are undef or the same as the first non-undef input.
16785 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16786 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16787 return false;
16788 return true;
16789}
16790
16792 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16793 return Op;
16794 if (Op.getOpcode() != ISD::BITCAST)
16795 return SDValue();
16796 Op = Op.getOperand(0);
16797 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16798 return Op;
16799 return SDValue();
16800}
16801
16802// Fix up the shuffle mask to account for the fact that the result of
16803// scalar_to_vector is not in lane zero. This just takes all values in
16804// the ranges specified by the min/max indices and adds the number of
16805// elements required to ensure each element comes from the respective
16806// position in the valid lane.
16807// On little endian, that's just the corresponding element in the other
16808// half of the vector. On big endian, it is in the same half but right
16809// justified rather than left justified in that half.
16811 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16812 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16813 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16814 int LHSEltFixup =
16815 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16816 int RHSEltFixup =
16817 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16818 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16819 int Idx = ShuffV[I];
16820 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16821 ShuffV[I] += LHSEltFixup;
16822 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16823 ShuffV[I] += RHSEltFixup;
16824 }
16825}
16826
16827// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16828// the original is:
16829// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16830// In such a case, just change the shuffle mask to extract the element
16831// from the permuted index.
16833 const PPCSubtarget &Subtarget) {
16834 SDLoc dl(OrigSToV);
16835 EVT VT = OrigSToV.getValueType();
16836 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16837 "Expecting a SCALAR_TO_VECTOR here");
16838 SDValue Input = OrigSToV.getOperand(0);
16839
16840 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16841 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16842 SDValue OrigVector = Input.getOperand(0);
16843
16844 // Can't handle non-const element indices or different vector types
16845 // for the input to the extract and the output of the scalar_to_vector.
16846 if (Idx && VT == OrigVector.getValueType()) {
16847 unsigned NumElts = VT.getVectorNumElements();
16848 assert(
16849 NumElts > 1 &&
16850 "Cannot produce a permuted scalar_to_vector for one element vector");
16851 SmallVector<int, 16> NewMask(NumElts, -1);
16852 unsigned ResultInElt = NumElts / 2;
16853 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16854 NewMask[ResultInElt] = Idx->getZExtValue();
16855 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16856 }
16857 }
16858 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16859 OrigSToV.getOperand(0));
16860}
16861
16863 int HalfVec, int LHSLastElementDefined,
16864 int RHSLastElementDefined) {
16865 for (int Index : ShuffV) {
16866 if (Index < 0) // Skip explicitly undefined mask indices.
16867 continue;
16868 // Handle first input vector of the vector_shuffle.
16869 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16870 (Index > LHSLastElementDefined))
16871 return false;
16872 // Handle second input vector of the vector_shuffle.
16873 if ((RHSLastElementDefined >= 0) &&
16874 (Index > HalfVec + RHSLastElementDefined))
16875 return false;
16876 }
16877 return true;
16878}
16879
16881 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16882 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16883 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16884 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16885 // Set up the values for the shuffle vector fixup.
16886 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16887 // The last element depends on if the input comes from the LHS or RHS.
16888 //
16889 // For example:
16890 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16891 //
16892 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16893 // because elements 1 and higher of a scalar_to_vector are undefined.
16894 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16895 // because elements 1 and higher of a scalar_to_vector are undefined.
16896 // It is also not 4 because the original scalar_to_vector is wider and
16897 // actually contains two i32 elements.
16898 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16899 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16900 : FirstElt;
16901 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16902 if (SToVPermuted.getValueType() != VecShuffOperandType)
16903 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16904 return SToVPermuted;
16905}
16906
16907// On little endian subtargets, combine shuffles such as:
16908// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16909// into:
16910// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16911// because the latter can be matched to a single instruction merge.
16912// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16913// to put the value into element zero. Adjust the shuffle mask so that the
16914// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16915// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16916// nodes with elements smaller than doubleword because all the ways
16917// of getting scalar data into a vector register put the value in the
16918// rightmost element of the left half of the vector.
16919SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16920 SelectionDAG &DAG) const {
16921 SDValue LHS = SVN->getOperand(0);
16922 SDValue RHS = SVN->getOperand(1);
16923 auto Mask = SVN->getMask();
16924 int NumElts = LHS.getValueType().getVectorNumElements();
16925 SDValue Res(SVN, 0);
16926 SDLoc dl(SVN);
16927 bool IsLittleEndian = Subtarget.isLittleEndian();
16928
16929 // On big endian targets this is only useful for subtargets with direct moves.
16930 // On little endian targets it would be useful for all subtargets with VSX.
16931 // However adding special handling for LE subtargets without direct moves
16932 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16933 // which includes direct moves.
16934 if (!Subtarget.hasDirectMove())
16935 return Res;
16936
16937 // If this is not a shuffle of a shuffle and the first element comes from
16938 // the second vector, canonicalize to the commuted form. This will make it
16939 // more likely to match one of the single instruction patterns.
16940 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16941 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16942 std::swap(LHS, RHS);
16943 Res = DAG.getCommutedVectorShuffle(*SVN);
16944
16945 if (!isa<ShuffleVectorSDNode>(Res))
16946 return Res;
16947
16948 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16949 }
16950
16951 // Adjust the shuffle mask if either input vector comes from a
16952 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16953 // form (to prevent the need for a swap).
16954 SmallVector<int, 16> ShuffV(Mask);
16955 SDValue SToVLHS = isScalarToVec(LHS);
16956 SDValue SToVRHS = isScalarToVec(RHS);
16957 if (SToVLHS || SToVRHS) {
16958 EVT VT = SVN->getValueType(0);
16959 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16960 int ShuffleNumElts = ShuffV.size();
16961 int HalfVec = ShuffleNumElts / 2;
16962 // The width of the "valid lane" (i.e. the lane that contains the value that
16963 // is vectorized) needs to be expressed in terms of the number of elements
16964 // of the shuffle. It is thereby the ratio of the values before and after
16965 // any bitcast, which will be set later on if the LHS or RHS are
16966 // SCALAR_TO_VECTOR nodes.
16967 unsigned LHSNumValidElts = HalfVec;
16968 unsigned RHSNumValidElts = HalfVec;
16969
16970 // Initially assume that neither input is permuted. These will be adjusted
16971 // accordingly if either input is. Note, that -1 means that all elements
16972 // are undefined.
16973 int LHSFirstElt = 0;
16974 int RHSFirstElt = ShuffleNumElts;
16975 int LHSLastElt = -1;
16976 int RHSLastElt = -1;
16977
16978 // Get the permuted scalar to vector nodes for the source(s) that come from
16979 // ISD::SCALAR_TO_VECTOR.
16980 // On big endian systems, this only makes sense for element sizes smaller
16981 // than 64 bits since for 64-bit elements, all instructions already put
16982 // the value into element zero. Since scalar size of LHS and RHS may differ
16983 // after isScalarToVec, this should be checked using their own sizes.
16984 int LHSScalarSize = 0;
16985 int RHSScalarSize = 0;
16986 if (SToVLHS) {
16987 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16988 if (!IsLittleEndian && LHSScalarSize >= 64)
16989 return Res;
16990 }
16991 if (SToVRHS) {
16992 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16993 if (!IsLittleEndian && RHSScalarSize >= 64)
16994 return Res;
16995 }
16996 if (LHSScalarSize != 0)
16998 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16999 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17000 if (RHSScalarSize != 0)
17002 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17003 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17004
17005 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17006 return Res;
17007
17008 // Fix up the shuffle mask to reflect where the desired element actually is.
17009 // The minimum and maximum indices that correspond to element zero for both
17010 // the LHS and RHS are computed and will control which shuffle mask entries
17011 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17012 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17014 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17015 LHSNumValidElts, RHSNumValidElts, Subtarget);
17016 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17017
17018 // We may have simplified away the shuffle. We won't be able to do anything
17019 // further with it here.
17020 if (!isa<ShuffleVectorSDNode>(Res))
17021 return Res;
17022 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17023 }
17024
17025 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17026 // The common case after we commuted the shuffle is that the RHS is a splat
17027 // and we have elements coming in from the splat at indices that are not
17028 // conducive to using a merge.
17029 // Example:
17030 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17031 if (!isSplatBV(TheSplat))
17032 return Res;
17033
17034 // We are looking for a mask such that all even elements are from
17035 // one vector and all odd elements from the other.
17036 if (!isAlternatingShuffMask(Mask, NumElts))
17037 return Res;
17038
17039 // Adjust the mask so we are pulling in the same index from the splat
17040 // as the index from the interesting vector in consecutive elements.
17041 if (IsLittleEndian) {
17042 // Example (even elements from first vector):
17043 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17044 if (Mask[0] < NumElts)
17045 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17046 if (ShuffV[i] < 0)
17047 continue;
17048 // If element from non-splat is undef, pick first element from splat.
17049 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17050 }
17051 // Example (odd elements from first vector):
17052 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17053 else
17054 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17055 if (ShuffV[i] < 0)
17056 continue;
17057 // If element from non-splat is undef, pick first element from splat.
17058 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17059 }
17060 } else {
17061 // Example (even elements from first vector):
17062 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17063 if (Mask[0] < NumElts)
17064 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17065 if (ShuffV[i] < 0)
17066 continue;
17067 // If element from non-splat is undef, pick first element from splat.
17068 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17069 }
17070 // Example (odd elements from first vector):
17071 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17072 else
17073 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17074 if (ShuffV[i] < 0)
17075 continue;
17076 // If element from non-splat is undef, pick first element from splat.
17077 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17078 }
17079 }
17080
17081 // If the RHS has undefs, we need to remove them since we may have created
17082 // a shuffle that adds those instead of the splat value.
17083 SDValue SplatVal =
17084 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17085 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17086
17087 if (IsLittleEndian)
17088 RHS = TheSplat;
17089 else
17090 LHS = TheSplat;
17091 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17092}
17093
17094SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17095 LSBaseSDNode *LSBase,
17096 DAGCombinerInfo &DCI) const {
17097 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17098 "Not a reverse memop pattern!");
17099
17100 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17101 auto Mask = SVN->getMask();
17102 int i = 0;
17103 auto I = Mask.rbegin();
17104 auto E = Mask.rend();
17105
17106 for (; I != E; ++I) {
17107 if (*I != i)
17108 return false;
17109 i++;
17110 }
17111 return true;
17112 };
17113
17114 SelectionDAG &DAG = DCI.DAG;
17115 EVT VT = SVN->getValueType(0);
17116
17117 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17118 return SDValue();
17119
17120 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17121 // See comment in PPCVSXSwapRemoval.cpp.
17122 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17123 if (!Subtarget.hasP9Vector())
17124 return SDValue();
17125
17126 if(!IsElementReverse(SVN))
17127 return SDValue();
17128
17129 if (LSBase->getOpcode() == ISD::LOAD) {
17130 // If the load return value 0 has more than one user except the
17131 // shufflevector instruction, it is not profitable to replace the
17132 // shufflevector with a reverse load.
17133 for (SDUse &Use : LSBase->uses())
17134 if (Use.getResNo() == 0 &&
17135 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17136 return SDValue();
17137
17138 SDLoc dl(LSBase);
17139 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17140 return DAG.getMemIntrinsicNode(
17141 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17142 LSBase->getMemoryVT(), LSBase->getMemOperand());
17143 }
17144
17145 if (LSBase->getOpcode() == ISD::STORE) {
17146 // If there are other uses of the shuffle, the swap cannot be avoided.
17147 // Forcing the use of an X-Form (since swapped stores only have
17148 // X-Forms) without removing the swap is unprofitable.
17149 if (!SVN->hasOneUse())
17150 return SDValue();
17151
17152 SDLoc dl(LSBase);
17153 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17154 LSBase->getBasePtr()};
17155 return DAG.getMemIntrinsicNode(
17156 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17157 LSBase->getMemoryVT(), LSBase->getMemOperand());
17158 }
17159
17160 llvm_unreachable("Expected a load or store node here");
17161}
17162
17163static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17164 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17165 if (IntrinsicID == Intrinsic::ppc_stdcx)
17166 StoreWidth = 8;
17167 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17168 StoreWidth = 4;
17169 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17170 StoreWidth = 2;
17171 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17172 StoreWidth = 1;
17173 else
17174 return false;
17175 return true;
17176}
17177
17180 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17181 // (ADDC (ADDE 0, 0, C), -1) -> C
17182 SDValue LHS = N->getOperand(0);
17183 SDValue RHS = N->getOperand(1);
17184 if (LHS->getOpcode() == PPCISD::ADDE &&
17185 isNullConstant(LHS->getOperand(0)) &&
17186 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17187 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17188 }
17189 }
17190 return SDValue();
17191}
17192
17194 DAGCombinerInfo &DCI) const {
17195 SelectionDAG &DAG = DCI.DAG;
17196 SDLoc dl(N);
17197 switch (N->getOpcode()) {
17198 default: break;
17199 case ISD::ADD:
17200 return combineADD(N, DCI);
17201 case ISD::AND: {
17202 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17203 // original input as that will prevent us from selecting optimal rotates.
17204 // This only matters if the input to the extend is i32 widened to i64.
17205 SDValue Op1 = N->getOperand(0);
17206 SDValue Op2 = N->getOperand(1);
17207 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17208 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17209 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17210 Op1.getOperand(0).getValueType() != MVT::i32)
17211 break;
17212 SDValue NarrowOp = Op1.getOperand(0);
17213 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17214 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17215 break;
17216
17217 uint64_t Imm = Op2->getAsZExtVal();
17218 // Make sure that the constant is narrow enough to fit in the narrow type.
17219 if (!isUInt<32>(Imm))
17220 break;
17221 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17222 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17223 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17224 }
17225 case ISD::SHL:
17226 return combineSHL(N, DCI);
17227 case ISD::SRA:
17228 return combineSRA(N, DCI);
17229 case ISD::SRL:
17230 return combineSRL(N, DCI);
17231 case ISD::MUL:
17232 return combineMUL(N, DCI);
17233 case ISD::FMA:
17234 case PPCISD::FNMSUB:
17235 return combineFMALike(N, DCI);
17236 case PPCISD::SHL:
17237 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17238 return N->getOperand(0);
17239 break;
17240 case PPCISD::SRL:
17241 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17242 return N->getOperand(0);
17243 break;
17244 case PPCISD::SRA:
17245 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17246 if (C->isZero() || // 0 >>s V -> 0.
17247 C->isAllOnes()) // -1 >>s V -> -1.
17248 return N->getOperand(0);
17249 }
17250 break;
17251 case ISD::SIGN_EXTEND:
17252 case ISD::ZERO_EXTEND:
17253 case ISD::ANY_EXTEND:
17254 return DAGCombineExtBoolTrunc(N, DCI);
17255 case ISD::TRUNCATE:
17256 return combineTRUNCATE(N, DCI);
17257 case ISD::SETCC:
17258 if (SDValue CSCC = combineSetCC(N, DCI))
17259 return CSCC;
17260 [[fallthrough]];
17261 case ISD::SELECT_CC:
17262 return DAGCombineTruncBoolExt(N, DCI);
17263 case ISD::SINT_TO_FP:
17264 case ISD::UINT_TO_FP:
17265 return combineFPToIntToFP(N, DCI);
17267 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17268 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17269 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17270 }
17271 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17272 case ISD::STORE: {
17273
17274 EVT Op1VT = N->getOperand(1).getValueType();
17275 unsigned Opcode = N->getOperand(1).getOpcode();
17276
17277 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17278 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17279 SDValue Val = combineStoreFPToInt(N, DCI);
17280 if (Val)
17281 return Val;
17282 }
17283
17284 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17285 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17286 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17287 if (Val)
17288 return Val;
17289 }
17290
17291 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17292 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17293 N->getOperand(1).getNode()->hasOneUse() &&
17294 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17295 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17296
17297 // STBRX can only handle simple types and it makes no sense to store less
17298 // two bytes in byte-reversed order.
17299 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17300 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17301 break;
17302
17303 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17304 // Do an any-extend to 32-bits if this is a half-word input.
17305 if (BSwapOp.getValueType() == MVT::i16)
17306 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17307
17308 // If the type of BSWAP operand is wider than stored memory width
17309 // it need to be shifted to the right side before STBRX.
17310 if (Op1VT.bitsGT(mVT)) {
17311 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17312 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17313 DAG.getConstant(Shift, dl, MVT::i32));
17314 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17315 if (Op1VT == MVT::i64)
17316 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17317 }
17318
17319 SDValue Ops[] = {
17320 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17321 };
17322 return
17323 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17324 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17325 cast<StoreSDNode>(N)->getMemOperand());
17326 }
17327
17328 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17329 // So it can increase the chance of CSE constant construction.
17330 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17331 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17332 // Need to sign-extended to 64-bits to handle negative values.
17333 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17334 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17335 MemVT.getSizeInBits());
17336 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17337
17338 auto *ST = cast<StoreSDNode>(N);
17339 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17340 ST->getBasePtr(), ST->getOffset(), MemVT,
17341 ST->getMemOperand(), ST->getAddressingMode(),
17342 /*IsTruncating=*/true);
17343 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17344 // new store which will change the constant by removing non-demanded bits.
17345 return ST->isUnindexed()
17346 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17347 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17348 }
17349
17350 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17351 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17352 if (Op1VT.isSimple()) {
17353 MVT StoreVT = Op1VT.getSimpleVT();
17354 if (Subtarget.needsSwapsForVSXMemOps() &&
17355 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17356 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17357 return expandVSXStoreForLE(N, DCI);
17358 }
17359 break;
17360 }
17361 case ISD::LOAD: {
17363 EVT VT = LD->getValueType(0);
17364
17365 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17366 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17367 if (VT.isSimple()) {
17368 MVT LoadVT = VT.getSimpleVT();
17369 if (Subtarget.needsSwapsForVSXMemOps() &&
17370 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17371 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17372 return expandVSXLoadForLE(N, DCI);
17373 }
17374
17375 // We sometimes end up with a 64-bit integer load, from which we extract
17376 // two single-precision floating-point numbers. This happens with
17377 // std::complex<float>, and other similar structures, because of the way we
17378 // canonicalize structure copies. However, if we lack direct moves,
17379 // then the final bitcasts from the extracted integer values to the
17380 // floating-point numbers turn into store/load pairs. Even with direct moves,
17381 // just loading the two floating-point numbers is likely better.
17382 auto ReplaceTwoFloatLoad = [&]() {
17383 if (VT != MVT::i64)
17384 return false;
17385
17386 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17387 LD->isVolatile())
17388 return false;
17389
17390 // We're looking for a sequence like this:
17391 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17392 // t16: i64 = srl t13, Constant:i32<32>
17393 // t17: i32 = truncate t16
17394 // t18: f32 = bitcast t17
17395 // t19: i32 = truncate t13
17396 // t20: f32 = bitcast t19
17397
17398 if (!LD->hasNUsesOfValue(2, 0))
17399 return false;
17400
17401 auto UI = LD->user_begin();
17402 while (UI.getUse().getResNo() != 0) ++UI;
17403 SDNode *Trunc = *UI++;
17404 while (UI.getUse().getResNo() != 0) ++UI;
17405 SDNode *RightShift = *UI;
17406 if (Trunc->getOpcode() != ISD::TRUNCATE)
17407 std::swap(Trunc, RightShift);
17408
17409 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17410 Trunc->getValueType(0) != MVT::i32 ||
17411 !Trunc->hasOneUse())
17412 return false;
17413 if (RightShift->getOpcode() != ISD::SRL ||
17414 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17415 RightShift->getConstantOperandVal(1) != 32 ||
17416 !RightShift->hasOneUse())
17417 return false;
17418
17419 SDNode *Trunc2 = *RightShift->user_begin();
17420 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17421 Trunc2->getValueType(0) != MVT::i32 ||
17422 !Trunc2->hasOneUse())
17423 return false;
17424
17425 SDNode *Bitcast = *Trunc->user_begin();
17426 SDNode *Bitcast2 = *Trunc2->user_begin();
17427
17428 if (Bitcast->getOpcode() != ISD::BITCAST ||
17429 Bitcast->getValueType(0) != MVT::f32)
17430 return false;
17431 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17432 Bitcast2->getValueType(0) != MVT::f32)
17433 return false;
17434
17435 if (Subtarget.isLittleEndian())
17436 std::swap(Bitcast, Bitcast2);
17437
17438 // Bitcast has the second float (in memory-layout order) and Bitcast2
17439 // has the first one.
17440
17441 SDValue BasePtr = LD->getBasePtr();
17442 if (LD->isIndexed()) {
17443 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17444 "Non-pre-inc AM on PPC?");
17445 BasePtr =
17446 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17447 LD->getOffset());
17448 }
17449
17450 auto MMOFlags =
17451 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17452 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17453 LD->getPointerInfo(), LD->getAlign(),
17454 MMOFlags, LD->getAAInfo());
17455 SDValue AddPtr =
17456 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17457 BasePtr, DAG.getIntPtrConstant(4, dl));
17458 SDValue FloatLoad2 = DAG.getLoad(
17459 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17460 LD->getPointerInfo().getWithOffset(4),
17461 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17462
17463 if (LD->isIndexed()) {
17464 // Note that DAGCombine should re-form any pre-increment load(s) from
17465 // what is produced here if that makes sense.
17466 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17467 }
17468
17469 DCI.CombineTo(Bitcast2, FloatLoad);
17470 DCI.CombineTo(Bitcast, FloatLoad2);
17471
17472 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17473 SDValue(FloatLoad2.getNode(), 1));
17474 return true;
17475 };
17476
17477 if (ReplaceTwoFloatLoad())
17478 return SDValue(N, 0);
17479
17480 EVT MemVT = LD->getMemoryVT();
17481 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17482 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17483 if (LD->isUnindexed() && VT.isVector() &&
17484 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17485 // P8 and later hardware should just use LOAD.
17486 !Subtarget.hasP8Vector() &&
17487 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17488 VT == MVT::v4f32))) &&
17489 LD->getAlign() < ABIAlignment) {
17490 // This is a type-legal unaligned Altivec load.
17491 SDValue Chain = LD->getChain();
17492 SDValue Ptr = LD->getBasePtr();
17493 bool isLittleEndian = Subtarget.isLittleEndian();
17494
17495 // This implements the loading of unaligned vectors as described in
17496 // the venerable Apple Velocity Engine overview. Specifically:
17497 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17498 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17499 //
17500 // The general idea is to expand a sequence of one or more unaligned
17501 // loads into an alignment-based permutation-control instruction (lvsl
17502 // or lvsr), a series of regular vector loads (which always truncate
17503 // their input address to an aligned address), and a series of
17504 // permutations. The results of these permutations are the requested
17505 // loaded values. The trick is that the last "extra" load is not taken
17506 // from the address you might suspect (sizeof(vector) bytes after the
17507 // last requested load), but rather sizeof(vector) - 1 bytes after the
17508 // last requested vector. The point of this is to avoid a page fault if
17509 // the base address happened to be aligned. This works because if the
17510 // base address is aligned, then adding less than a full vector length
17511 // will cause the last vector in the sequence to be (re)loaded.
17512 // Otherwise, the next vector will be fetched as you might suspect was
17513 // necessary.
17514
17515 // We might be able to reuse the permutation generation from
17516 // a different base address offset from this one by an aligned amount.
17517 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17518 // optimization later.
17519 Intrinsic::ID Intr, IntrLD, IntrPerm;
17520 MVT PermCntlTy, PermTy, LDTy;
17521 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17522 : Intrinsic::ppc_altivec_lvsl;
17523 IntrLD = Intrinsic::ppc_altivec_lvx;
17524 IntrPerm = Intrinsic::ppc_altivec_vperm;
17525 PermCntlTy = MVT::v16i8;
17526 PermTy = MVT::v4i32;
17527 LDTy = MVT::v4i32;
17528
17529 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17530
17531 // Create the new MMO for the new base load. It is like the original MMO,
17532 // but represents an area in memory almost twice the vector size centered
17533 // on the original address. If the address is unaligned, we might start
17534 // reading up to (sizeof(vector)-1) bytes below the address of the
17535 // original unaligned load.
17537 MachineMemOperand *BaseMMO =
17538 MF.getMachineMemOperand(LD->getMemOperand(),
17539 -(int64_t)MemVT.getStoreSize()+1,
17540 2*MemVT.getStoreSize()-1);
17541
17542 // Create the new base load.
17543 SDValue LDXIntID =
17544 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17545 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17546 SDValue BaseLoad =
17548 DAG.getVTList(PermTy, MVT::Other),
17549 BaseLoadOps, LDTy, BaseMMO);
17550
17551 // Note that the value of IncOffset (which is provided to the next
17552 // load's pointer info offset value, and thus used to calculate the
17553 // alignment), and the value of IncValue (which is actually used to
17554 // increment the pointer value) are different! This is because we
17555 // require the next load to appear to be aligned, even though it
17556 // is actually offset from the base pointer by a lesser amount.
17557 int IncOffset = VT.getSizeInBits() / 8;
17558 int IncValue = IncOffset;
17559
17560 // Walk (both up and down) the chain looking for another load at the real
17561 // (aligned) offset (the alignment of the other load does not matter in
17562 // this case). If found, then do not use the offset reduction trick, as
17563 // that will prevent the loads from being later combined (as they would
17564 // otherwise be duplicates).
17565 if (!findConsecutiveLoad(LD, DAG))
17566 --IncValue;
17567
17569 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17570 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17571
17572 MachineMemOperand *ExtraMMO =
17573 MF.getMachineMemOperand(LD->getMemOperand(),
17574 1, 2*MemVT.getStoreSize()-1);
17575 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17576 SDValue ExtraLoad =
17578 DAG.getVTList(PermTy, MVT::Other),
17579 ExtraLoadOps, LDTy, ExtraMMO);
17580
17581 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17582 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17583
17584 // Because vperm has a big-endian bias, we must reverse the order
17585 // of the input vectors and complement the permute control vector
17586 // when generating little endian code. We have already handled the
17587 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17588 // and ExtraLoad here.
17589 SDValue Perm;
17590 if (isLittleEndian)
17591 Perm = BuildIntrinsicOp(IntrPerm,
17592 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17593 else
17594 Perm = BuildIntrinsicOp(IntrPerm,
17595 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17596
17597 if (VT != PermTy)
17598 Perm = Subtarget.hasAltivec()
17599 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17600 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17601 DAG.getTargetConstant(1, dl, MVT::i64));
17602 // second argument is 1 because this rounding
17603 // is always exact.
17604
17605 // The output of the permutation is our loaded result, the TokenFactor is
17606 // our new chain.
17607 DCI.CombineTo(N, Perm, TF);
17608 return SDValue(N, 0);
17609 }
17610 }
17611 break;
17613 bool isLittleEndian = Subtarget.isLittleEndian();
17614 unsigned IID = N->getConstantOperandVal(0);
17615 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17616 : Intrinsic::ppc_altivec_lvsl);
17617 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17618 SDValue Add = N->getOperand(1);
17619
17620 int Bits = 4 /* 16 byte alignment */;
17621
17622 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17623 APInt::getAllOnes(Bits /* alignment */)
17624 .zext(Add.getScalarValueSizeInBits()))) {
17625 SDNode *BasePtr = Add->getOperand(0).getNode();
17626 for (SDNode *U : BasePtr->users()) {
17627 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17628 U->getConstantOperandVal(0) == IID) {
17629 // We've found another LVSL/LVSR, and this address is an aligned
17630 // multiple of that one. The results will be the same, so use the
17631 // one we've just found instead.
17632
17633 return SDValue(U, 0);
17634 }
17635 }
17636 }
17637
17638 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17639 SDNode *BasePtr = Add->getOperand(0).getNode();
17640 for (SDNode *U : BasePtr->users()) {
17641 if (U->getOpcode() == ISD::ADD &&
17642 isa<ConstantSDNode>(U->getOperand(1)) &&
17643 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17644 (1ULL << Bits) ==
17645 0) {
17646 SDNode *OtherAdd = U;
17647 for (SDNode *V : OtherAdd->users()) {
17648 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17649 V->getConstantOperandVal(0) == IID) {
17650 return SDValue(V, 0);
17651 }
17652 }
17653 }
17654 }
17655 }
17656 }
17657
17658 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17659 // Expose the vabsduw/h/b opportunity for down stream
17660 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17661 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17662 IID == Intrinsic::ppc_altivec_vmaxsh ||
17663 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17664 SDValue V1 = N->getOperand(1);
17665 SDValue V2 = N->getOperand(2);
17666 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17667 V1.getSimpleValueType() == MVT::v8i16 ||
17668 V1.getSimpleValueType() == MVT::v16i8) &&
17670 // (0-a, a)
17671 if (V1.getOpcode() == ISD::SUB &&
17673 V1.getOperand(1) == V2) {
17674 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17675 }
17676 // (a, 0-a)
17677 if (V2.getOpcode() == ISD::SUB &&
17679 V2.getOperand(1) == V1) {
17680 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17681 }
17682 // (x-y, y-x)
17683 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17684 V1.getOperand(0) == V2.getOperand(1) &&
17685 V1.getOperand(1) == V2.getOperand(0)) {
17686 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17687 }
17688 }
17689 }
17690 }
17691
17692 break;
17694 switch (N->getConstantOperandVal(1)) {
17695 default:
17696 break;
17697 case Intrinsic::ppc_altivec_vsum4sbs:
17698 case Intrinsic::ppc_altivec_vsum4shs:
17699 case Intrinsic::ppc_altivec_vsum4ubs: {
17700 // These sum-across intrinsics only have a chain due to the side effect
17701 // that they may set the SAT bit. If we know the SAT bit will not be set
17702 // for some inputs, we can replace any uses of their chain with the
17703 // input chain.
17704 if (BuildVectorSDNode *BVN =
17705 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17706 APInt APSplatBits, APSplatUndef;
17707 unsigned SplatBitSize;
17708 bool HasAnyUndefs;
17709 bool BVNIsConstantSplat = BVN->isConstantSplat(
17710 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17711 !Subtarget.isLittleEndian());
17712 // If the constant splat vector is 0, the SAT bit will not be set.
17713 if (BVNIsConstantSplat && APSplatBits == 0)
17714 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17715 }
17716 return SDValue();
17717 }
17718 case Intrinsic::ppc_vsx_lxvw4x:
17719 case Intrinsic::ppc_vsx_lxvd2x:
17720 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17721 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17722 if (Subtarget.needsSwapsForVSXMemOps())
17723 return expandVSXLoadForLE(N, DCI);
17724 break;
17725 }
17726 break;
17728 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17729 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17730 if (Subtarget.needsSwapsForVSXMemOps()) {
17731 switch (N->getConstantOperandVal(1)) {
17732 default:
17733 break;
17734 case Intrinsic::ppc_vsx_stxvw4x:
17735 case Intrinsic::ppc_vsx_stxvd2x:
17736 return expandVSXStoreForLE(N, DCI);
17737 }
17738 }
17739 break;
17740 case ISD::BSWAP: {
17741 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17742 // For subtargets without LDBRX, we can still do better than the default
17743 // expansion even for 64-bit BSWAP (LOAD).
17744 bool Is64BitBswapOn64BitTgt =
17745 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17746 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17747 N->getOperand(0).hasOneUse();
17748 if (IsSingleUseNormalLd &&
17749 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17750 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17751 SDValue Load = N->getOperand(0);
17752 LoadSDNode *LD = cast<LoadSDNode>(Load);
17753 // Create the byte-swapping load.
17754 SDValue Ops[] = {
17755 LD->getChain(), // Chain
17756 LD->getBasePtr(), // Ptr
17757 DAG.getValueType(N->getValueType(0)) // VT
17758 };
17759 SDValue BSLoad =
17760 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
17761 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17762 MVT::i64 : MVT::i32, MVT::Other),
17763 Ops, LD->getMemoryVT(), LD->getMemOperand());
17764
17765 // If this is an i16 load, insert the truncate.
17766 SDValue ResVal = BSLoad;
17767 if (N->getValueType(0) == MVT::i16)
17768 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17769
17770 // First, combine the bswap away. This makes the value produced by the
17771 // load dead.
17772 DCI.CombineTo(N, ResVal);
17773
17774 // Next, combine the load away, we give it a bogus result value but a real
17775 // chain result. The result value is dead because the bswap is dead.
17776 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17777
17778 // Return N so it doesn't get rechecked!
17779 return SDValue(N, 0);
17780 }
17781 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17782 // before legalization so that the BUILD_PAIR is handled correctly.
17783 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17784 !IsSingleUseNormalLd)
17785 return SDValue();
17786 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17787
17788 // Can't split volatile or atomic loads.
17789 if (!LD->isSimple())
17790 return SDValue();
17791 SDValue BasePtr = LD->getBasePtr();
17792 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17793 LD->getPointerInfo(), LD->getAlign());
17794 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17795 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17796 DAG.getIntPtrConstant(4, dl));
17798 LD->getMemOperand(), 4, 4);
17799 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17800 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17801 SDValue Res;
17802 if (Subtarget.isLittleEndian())
17803 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17804 else
17805 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17806 SDValue TF =
17807 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17808 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17809 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17810 return Res;
17811 }
17812 case PPCISD::VCMP:
17813 // If a VCMP_rec node already exists with exactly the same operands as this
17814 // node, use its result instead of this node (VCMP_rec computes both a CR6
17815 // and a normal output).
17816 //
17817 if (!N->getOperand(0).hasOneUse() &&
17818 !N->getOperand(1).hasOneUse() &&
17819 !N->getOperand(2).hasOneUse()) {
17820
17821 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17822 SDNode *VCMPrecNode = nullptr;
17823
17824 SDNode *LHSN = N->getOperand(0).getNode();
17825 for (SDNode *User : LHSN->users())
17826 if (User->getOpcode() == PPCISD::VCMP_rec &&
17827 User->getOperand(1) == N->getOperand(1) &&
17828 User->getOperand(2) == N->getOperand(2) &&
17829 User->getOperand(0) == N->getOperand(0)) {
17830 VCMPrecNode = User;
17831 break;
17832 }
17833
17834 // If there is no VCMP_rec node, or if the flag value has a single use,
17835 // don't transform this.
17836 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17837 break;
17838
17839 // Look at the (necessarily single) use of the flag value. If it has a
17840 // chain, this transformation is more complex. Note that multiple things
17841 // could use the value result, which we should ignore.
17842 SDNode *FlagUser = nullptr;
17843 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17844 FlagUser == nullptr; ++UI) {
17845 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17846 SDNode *User = UI->getUser();
17847 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17848 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17849 FlagUser = User;
17850 break;
17851 }
17852 }
17853 }
17854
17855 // If the user is a MFOCRF instruction, we know this is safe.
17856 // Otherwise we give up for right now.
17857 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17858 return SDValue(VCMPrecNode, 0);
17859 }
17860 break;
17861 case ISD::BR_CC: {
17862 // If this is a branch on an altivec predicate comparison, lower this so
17863 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17864 // lowering is done pre-legalize, because the legalizer lowers the predicate
17865 // compare down to code that is difficult to reassemble.
17866 // This code also handles branches that depend on the result of a store
17867 // conditional.
17868 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17869 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17870
17871 int CompareOpc;
17872 bool isDot;
17873
17874 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17875 break;
17876
17877 // Since we are doing this pre-legalize, the RHS can be a constant of
17878 // arbitrary bitwidth which may cause issues when trying to get the value
17879 // from the underlying APInt.
17880 auto RHSAPInt = RHS->getAsAPIntVal();
17881 if (!RHSAPInt.isIntN(64))
17882 break;
17883
17884 unsigned Val = RHSAPInt.getZExtValue();
17885 auto isImpossibleCompare = [&]() {
17886 // If this is a comparison against something other than 0/1, then we know
17887 // that the condition is never/always true.
17888 if (Val != 0 && Val != 1) {
17889 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17890 return N->getOperand(0);
17891 // Always !=, turn it into an unconditional branch.
17892 return DAG.getNode(ISD::BR, dl, MVT::Other,
17893 N->getOperand(0), N->getOperand(4));
17894 }
17895 return SDValue();
17896 };
17897 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17898 unsigned StoreWidth = 0;
17899 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17900 isStoreConditional(LHS, StoreWidth)) {
17901 if (SDValue Impossible = isImpossibleCompare())
17902 return Impossible;
17903 PPC::Predicate CompOpc;
17904 // eq 0 => ne
17905 // ne 0 => eq
17906 // eq 1 => eq
17907 // ne 1 => ne
17908 if (Val == 0)
17909 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17910 else
17911 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17912
17913 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17914 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17915 auto *MemNode = cast<MemSDNode>(LHS);
17916 SDValue ConstSt = DAG.getMemIntrinsicNode(
17917 PPCISD::STORE_COND, dl,
17918 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17919 MemNode->getMemoryVT(), MemNode->getMemOperand());
17920
17921 SDValue InChain;
17922 // Unchain the branch from the original store conditional.
17923 if (N->getOperand(0) == LHS.getValue(1))
17924 InChain = LHS.getOperand(0);
17925 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17926 SmallVector<SDValue, 4> InChains;
17927 SDValue InTF = N->getOperand(0);
17928 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17929 if (InTF.getOperand(i) != LHS.getValue(1))
17930 InChains.push_back(InTF.getOperand(i));
17931 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17932 }
17933
17934 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17935 DAG.getConstant(CompOpc, dl, MVT::i32),
17936 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17937 ConstSt.getValue(2));
17938 }
17939
17940 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17941 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17942 assert(isDot && "Can't compare against a vector result!");
17943
17944 if (SDValue Impossible = isImpossibleCompare())
17945 return Impossible;
17946
17947 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17948 // Create the PPCISD altivec 'dot' comparison node.
17949 SDValue Ops[] = {
17950 LHS.getOperand(2), // LHS of compare
17951 LHS.getOperand(3), // RHS of compare
17952 DAG.getConstant(CompareOpc, dl, MVT::i32)
17953 };
17954 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17955 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17956
17957 // Unpack the result based on how the target uses it.
17958 PPC::Predicate CompOpc;
17959 switch (LHS.getConstantOperandVal(1)) {
17960 default: // Can't happen, don't crash on invalid number though.
17961 case 0: // Branch on the value of the EQ bit of CR6.
17962 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17963 break;
17964 case 1: // Branch on the inverted value of the EQ bit of CR6.
17965 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17966 break;
17967 case 2: // Branch on the value of the LT bit of CR6.
17968 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17969 break;
17970 case 3: // Branch on the inverted value of the LT bit of CR6.
17971 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17972 break;
17973 }
17974
17975 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17976 DAG.getConstant(CompOpc, dl, MVT::i32),
17977 DAG.getRegister(PPC::CR6, MVT::i32),
17978 N->getOperand(4), CompNode.getValue(1));
17979 }
17980 break;
17981 }
17982 case ISD::BUILD_VECTOR:
17983 return DAGCombineBuildVector(N, DCI);
17984 case PPCISD::ADDC:
17985 return DAGCombineAddc(N, DCI);
17986 }
17987
17988 return SDValue();
17989}
17990
17991SDValue
17993 SelectionDAG &DAG,
17994 SmallVectorImpl<SDNode *> &Created) const {
17995 // fold (sdiv X, pow2)
17996 EVT VT = N->getValueType(0);
17997 if (VT == MVT::i64 && !Subtarget.isPPC64())
17998 return SDValue();
17999 if ((VT != MVT::i32 && VT != MVT::i64) ||
18000 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18001 return SDValue();
18002
18003 SDLoc DL(N);
18004 SDValue N0 = N->getOperand(0);
18005
18006 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18007 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18008 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18009
18010 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18011 Created.push_back(Op.getNode());
18012
18013 if (IsNegPow2) {
18014 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18015 Created.push_back(Op.getNode());
18016 }
18017
18018 return Op;
18019}
18020
18021//===----------------------------------------------------------------------===//
18022// Inline Assembly Support
18023//===----------------------------------------------------------------------===//
18024
18026 KnownBits &Known,
18027 const APInt &DemandedElts,
18028 const SelectionDAG &DAG,
18029 unsigned Depth) const {
18030 Known.resetAll();
18031 switch (Op.getOpcode()) {
18032 default: break;
18033 case PPCISD::LBRX: {
18034 // lhbrx is known to have the top bits cleared out.
18035 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18036 Known.Zero = 0xFFFF0000;
18037 break;
18038 }
18039 case PPCISD::ADDE: {
18040 if (Op.getResNo() == 0) {
18041 // (0|1), _ = ADDE 0, 0, CARRY
18042 SDValue LHS = Op.getOperand(0);
18043 SDValue RHS = Op.getOperand(1);
18044 if (isNullConstant(LHS) && isNullConstant(RHS))
18045 Known.Zero = ~1ULL;
18046 }
18047 break;
18048 }
18050 switch (Op.getConstantOperandVal(0)) {
18051 default: break;
18052 case Intrinsic::ppc_altivec_vcmpbfp_p:
18053 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18054 case Intrinsic::ppc_altivec_vcmpequb_p:
18055 case Intrinsic::ppc_altivec_vcmpequh_p:
18056 case Intrinsic::ppc_altivec_vcmpequw_p:
18057 case Intrinsic::ppc_altivec_vcmpequd_p:
18058 case Intrinsic::ppc_altivec_vcmpequq_p:
18059 case Intrinsic::ppc_altivec_vcmpgefp_p:
18060 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18061 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18062 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18063 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18064 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18065 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18066 case Intrinsic::ppc_altivec_vcmpgtub_p:
18067 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18068 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18069 case Intrinsic::ppc_altivec_vcmpgtud_p:
18070 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18071 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18072 break;
18073 }
18074 break;
18075 }
18077 switch (Op.getConstantOperandVal(1)) {
18078 default:
18079 break;
18080 case Intrinsic::ppc_load2r:
18081 // Top bits are cleared for load2r (which is the same as lhbrx).
18082 Known.Zero = 0xFFFF0000;
18083 break;
18084 }
18085 break;
18086 }
18087 }
18088}
18089
18091 switch (Subtarget.getCPUDirective()) {
18092 default: break;
18093 case PPC::DIR_970:
18094 case PPC::DIR_PWR4:
18095 case PPC::DIR_PWR5:
18096 case PPC::DIR_PWR5X:
18097 case PPC::DIR_PWR6:
18098 case PPC::DIR_PWR6X:
18099 case PPC::DIR_PWR7:
18100 case PPC::DIR_PWR8:
18101 case PPC::DIR_PWR9:
18102 case PPC::DIR_PWR10:
18103 case PPC::DIR_PWR11:
18104 case PPC::DIR_PWR_FUTURE: {
18105 if (!ML)
18106 break;
18107
18109 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18110 // so that we can decrease cache misses and branch-prediction misses.
18111 // Actual alignment of the loop will depend on the hotness check and other
18112 // logic in alignBlocks.
18113 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18114 return Align(32);
18115 }
18116
18117 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18118
18119 // For small loops (between 5 and 8 instructions), align to a 32-byte
18120 // boundary so that the entire loop fits in one instruction-cache line.
18121 uint64_t LoopSize = 0;
18122 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18123 for (const MachineInstr &J : **I) {
18124 LoopSize += TII->getInstSizeInBytes(J);
18125 if (LoopSize > 32)
18126 break;
18127 }
18128
18129 if (LoopSize > 16 && LoopSize <= 32)
18130 return Align(32);
18131
18132 break;
18133 }
18134 }
18135
18137}
18138
18139/// getConstraintType - Given a constraint, return the type of
18140/// constraint it is for this target.
18143 if (Constraint.size() == 1) {
18144 switch (Constraint[0]) {
18145 default: break;
18146 case 'b':
18147 case 'r':
18148 case 'f':
18149 case 'd':
18150 case 'v':
18151 case 'y':
18152 return C_RegisterClass;
18153 case 'Z':
18154 // FIXME: While Z does indicate a memory constraint, it specifically
18155 // indicates an r+r address (used in conjunction with the 'y' modifier
18156 // in the replacement string). Currently, we're forcing the base
18157 // register to be r0 in the asm printer (which is interpreted as zero)
18158 // and forming the complete address in the second register. This is
18159 // suboptimal.
18160 return C_Memory;
18161 }
18162 } else if (Constraint == "wc") { // individual CR bits.
18163 return C_RegisterClass;
18164 } else if (Constraint == "wa" || Constraint == "wd" ||
18165 Constraint == "wf" || Constraint == "ws" ||
18166 Constraint == "wi" || Constraint == "ww") {
18167 return C_RegisterClass; // VSX registers.
18168 }
18169 return TargetLowering::getConstraintType(Constraint);
18170}
18171
18172/// Examine constraint type and operand type and determine a weight value.
18173/// This object must already have been set up with the operand type
18174/// and the current alternative constraint selected.
18177 AsmOperandInfo &info, const char *constraint) const {
18179 Value *CallOperandVal = info.CallOperandVal;
18180 // If we don't have a value, we can't do a match,
18181 // but allow it at the lowest weight.
18182 if (!CallOperandVal)
18183 return CW_Default;
18184 Type *type = CallOperandVal->getType();
18185
18186 // Look at the constraint type.
18187 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18188 return CW_Register; // an individual CR bit.
18189 else if ((StringRef(constraint) == "wa" ||
18190 StringRef(constraint) == "wd" ||
18191 StringRef(constraint) == "wf") &&
18192 type->isVectorTy())
18193 return CW_Register;
18194 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18195 return CW_Register; // just hold 64-bit integers data.
18196 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18197 return CW_Register;
18198 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18199 return CW_Register;
18200
18201 switch (*constraint) {
18202 default:
18204 break;
18205 case 'b':
18206 if (type->isIntegerTy())
18207 weight = CW_Register;
18208 break;
18209 case 'f':
18210 if (type->isFloatTy())
18211 weight = CW_Register;
18212 break;
18213 case 'd':
18214 if (type->isDoubleTy())
18215 weight = CW_Register;
18216 break;
18217 case 'v':
18218 if (type->isVectorTy())
18219 weight = CW_Register;
18220 break;
18221 case 'y':
18222 weight = CW_Register;
18223 break;
18224 case 'Z':
18225 weight = CW_Memory;
18226 break;
18227 }
18228 return weight;
18229}
18230
18231std::pair<unsigned, const TargetRegisterClass *>
18233 StringRef Constraint,
18234 MVT VT) const {
18235 if (Constraint.size() == 1) {
18236 // GCC RS6000 Constraint Letters
18237 switch (Constraint[0]) {
18238 case 'b': // R1-R31
18239 if (VT == MVT::i64 && Subtarget.isPPC64())
18240 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18241 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18242 case 'r': // R0-R31
18243 if (VT == MVT::i64 && Subtarget.isPPC64())
18244 return std::make_pair(0U, &PPC::G8RCRegClass);
18245 return std::make_pair(0U, &PPC::GPRCRegClass);
18246 // 'd' and 'f' constraints are both defined to be "the floating point
18247 // registers", where one is for 32-bit and the other for 64-bit. We don't
18248 // really care overly much here so just give them all the same reg classes.
18249 case 'd':
18250 case 'f':
18251 if (Subtarget.hasSPE()) {
18252 if (VT == MVT::f32 || VT == MVT::i32)
18253 return std::make_pair(0U, &PPC::GPRCRegClass);
18254 if (VT == MVT::f64 || VT == MVT::i64)
18255 return std::make_pair(0U, &PPC::SPERCRegClass);
18256 } else {
18257 if (VT == MVT::f32 || VT == MVT::i32)
18258 return std::make_pair(0U, &PPC::F4RCRegClass);
18259 if (VT == MVT::f64 || VT == MVT::i64)
18260 return std::make_pair(0U, &PPC::F8RCRegClass);
18261 }
18262 break;
18263 case 'v':
18264 if (Subtarget.hasAltivec() && VT.isVector())
18265 return std::make_pair(0U, &PPC::VRRCRegClass);
18266 else if (Subtarget.hasVSX())
18267 // Scalars in Altivec registers only make sense with VSX.
18268 return std::make_pair(0U, &PPC::VFRCRegClass);
18269 break;
18270 case 'y': // crrc
18271 return std::make_pair(0U, &PPC::CRRCRegClass);
18272 }
18273 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18274 // An individual CR bit.
18275 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18276 } else if ((Constraint == "wa" || Constraint == "wd" ||
18277 Constraint == "wf" || Constraint == "wi") &&
18278 Subtarget.hasVSX()) {
18279 // A VSX register for either a scalar (FP) or vector. There is no
18280 // support for single precision scalars on subtargets prior to Power8.
18281 if (VT.isVector())
18282 return std::make_pair(0U, &PPC::VSRCRegClass);
18283 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18284 return std::make_pair(0U, &PPC::VSSRCRegClass);
18285 return std::make_pair(0U, &PPC::VSFRCRegClass);
18286 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18287 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18288 return std::make_pair(0U, &PPC::VSSRCRegClass);
18289 else
18290 return std::make_pair(0U, &PPC::VSFRCRegClass);
18291 } else if (Constraint == "lr") {
18292 if (VT == MVT::i64)
18293 return std::make_pair(0U, &PPC::LR8RCRegClass);
18294 else
18295 return std::make_pair(0U, &PPC::LRRCRegClass);
18296 }
18297
18298 // Handle special cases of physical registers that are not properly handled
18299 // by the base class.
18300 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18301 // If we name a VSX register, we can't defer to the base class because it
18302 // will not recognize the correct register (their names will be VSL{0-31}
18303 // and V{0-31} so they won't match). So we match them here.
18304 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18305 int VSNum = atoi(Constraint.data() + 3);
18306 assert(VSNum >= 0 && VSNum <= 63 &&
18307 "Attempted to access a vsr out of range");
18308 if (VSNum < 32)
18309 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18310 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18311 }
18312
18313 // For float registers, we can't defer to the base class as it will match
18314 // the SPILLTOVSRRC class.
18315 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18316 int RegNum = atoi(Constraint.data() + 2);
18317 if (RegNum > 31 || RegNum < 0)
18318 report_fatal_error("Invalid floating point register number");
18319 if (VT == MVT::f32 || VT == MVT::i32)
18320 return Subtarget.hasSPE()
18321 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18322 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18323 if (VT == MVT::f64 || VT == MVT::i64)
18324 return Subtarget.hasSPE()
18325 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18326 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18327 }
18328 }
18329
18330 std::pair<unsigned, const TargetRegisterClass *> R =
18332
18333 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18334 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18335 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18336 // register.
18337 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18338 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18339 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18340 PPC::GPRCRegClass.contains(R.first))
18341 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18342 PPC::sub_32, &PPC::G8RCRegClass),
18343 &PPC::G8RCRegClass);
18344
18345 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18346 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18347 R.first = PPC::CR0;
18348 R.second = &PPC::CRRCRegClass;
18349 }
18350 // FIXME: This warning should ideally be emitted in the front end.
18351 const auto &TM = getTargetMachine();
18352 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18353 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18354 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18355 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18356 errs() << "warning: vector registers 20 to 32 are reserved in the "
18357 "default AIX AltiVec ABI and cannot be used\n";
18358 }
18359
18360 return R;
18361}
18362
18363/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18364/// vector. If it is invalid, don't add anything to Ops.
18366 StringRef Constraint,
18367 std::vector<SDValue> &Ops,
18368 SelectionDAG &DAG) const {
18369 SDValue Result;
18370
18371 // Only support length 1 constraints.
18372 if (Constraint.size() > 1)
18373 return;
18374
18375 char Letter = Constraint[0];
18376 switch (Letter) {
18377 default: break;
18378 case 'I':
18379 case 'J':
18380 case 'K':
18381 case 'L':
18382 case 'M':
18383 case 'N':
18384 case 'O':
18385 case 'P': {
18387 if (!CST) return; // Must be an immediate to match.
18388 SDLoc dl(Op);
18389 int64_t Value = CST->getSExtValue();
18390 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18391 // numbers are printed as such.
18392 switch (Letter) {
18393 default: llvm_unreachable("Unknown constraint letter!");
18394 case 'I': // "I" is a signed 16-bit constant.
18395 if (isInt<16>(Value))
18396 Result = DAG.getTargetConstant(Value, dl, TCVT);
18397 break;
18398 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18400 Result = DAG.getTargetConstant(Value, dl, TCVT);
18401 break;
18402 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18404 Result = DAG.getTargetConstant(Value, dl, TCVT);
18405 break;
18406 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18407 if (isUInt<16>(Value))
18408 Result = DAG.getTargetConstant(Value, dl, TCVT);
18409 break;
18410 case 'M': // "M" is a constant that is greater than 31.
18411 if (Value > 31)
18412 Result = DAG.getTargetConstant(Value, dl, TCVT);
18413 break;
18414 case 'N': // "N" is a positive constant that is an exact power of two.
18415 if (Value > 0 && isPowerOf2_64(Value))
18416 Result = DAG.getTargetConstant(Value, dl, TCVT);
18417 break;
18418 case 'O': // "O" is the constant zero.
18419 if (Value == 0)
18420 Result = DAG.getTargetConstant(Value, dl, TCVT);
18421 break;
18422 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18423 if (isInt<16>(-Value))
18424 Result = DAG.getTargetConstant(Value, dl, TCVT);
18425 break;
18426 }
18427 break;
18428 }
18429 }
18430
18431 if (Result.getNode()) {
18432 Ops.push_back(Result);
18433 return;
18434 }
18435
18436 // Handle standard constraint letters.
18438}
18439
18442 SelectionDAG &DAG) const {
18443 if (I.getNumOperands() <= 1)
18444 return;
18445 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18446 return;
18447 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18448 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18449 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18450 return;
18451
18452 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18453 Ops.push_back(DAG.getMDNode(MDN));
18454}
18455
18456// isLegalAddressingMode - Return true if the addressing mode represented
18457// by AM is legal for this target, for a load/store of the specified type.
18459 const AddrMode &AM, Type *Ty,
18460 unsigned AS,
18461 Instruction *I) const {
18462 // Vector type r+i form is supported since power9 as DQ form. We don't check
18463 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18464 // imm form is preferred and the offset can be adjusted to use imm form later
18465 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18466 // max offset to check legal addressing mode, we should be a little aggressive
18467 // to contain other offsets for that LSRUse.
18468 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18469 return false;
18470
18471 // PPC allows a sign-extended 16-bit immediate field.
18472 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18473 return false;
18474
18475 // No global is ever allowed as a base.
18476 if (AM.BaseGV)
18477 return false;
18478
18479 // PPC only support r+r,
18480 switch (AM.Scale) {
18481 case 0: // "r+i" or just "i", depending on HasBaseReg.
18482 break;
18483 case 1:
18484 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18485 return false;
18486 // Otherwise we have r+r or r+i.
18487 break;
18488 case 2:
18489 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18490 return false;
18491 // Allow 2*r as r+r.
18492 break;
18493 default:
18494 // No other scales are supported.
18495 return false;
18496 }
18497
18498 return true;
18499}
18500
18501SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18502 SelectionDAG &DAG) const {
18504 MachineFrameInfo &MFI = MF.getFrameInfo();
18505 MFI.setReturnAddressIsTaken(true);
18506
18507 SDLoc dl(Op);
18508 unsigned Depth = Op.getConstantOperandVal(0);
18509
18510 // Make sure the function does not optimize away the store of the RA to
18511 // the stack.
18512 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18513 FuncInfo->setLRStoreRequired();
18514 auto PtrVT = getPointerTy(MF.getDataLayout());
18515
18516 if (Depth > 0) {
18517 // The link register (return address) is saved in the caller's frame
18518 // not the callee's stack frame. So we must get the caller's frame
18519 // address and load the return address at the LR offset from there.
18520 SDValue FrameAddr =
18521 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18523 SDValue Offset =
18524 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18525 Subtarget.getScalarIntVT());
18526 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18527 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18529 }
18530
18531 // Just load the return address off the stack.
18532 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18533 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18535}
18536
18537SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18538 SelectionDAG &DAG) const {
18539 SDLoc dl(Op);
18540 unsigned Depth = Op.getConstantOperandVal(0);
18541
18542 MachineFunction &MF = DAG.getMachineFunction();
18543 MachineFrameInfo &MFI = MF.getFrameInfo();
18544 MFI.setFrameAddressIsTaken(true);
18545
18546 EVT PtrVT = getPointerTy(MF.getDataLayout());
18547 bool isPPC64 = PtrVT == MVT::i64;
18548
18549 // Naked functions never have a frame pointer, and so we use r1. For all
18550 // other functions, this decision must be delayed until during PEI.
18551 unsigned FrameReg;
18552 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18553 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18554 else
18555 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18556
18557 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18558 PtrVT);
18559 while (Depth--)
18560 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18561 FrameAddr, MachinePointerInfo());
18562 return FrameAddr;
18563}
18564
18565#define GET_REGISTER_MATCHER
18566#include "PPCGenAsmMatcher.inc"
18567
18569 const MachineFunction &MF) const {
18570 bool IsPPC64 = Subtarget.isPPC64();
18571
18572 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18573 if (!Is64Bit && VT != LLT::scalar(32))
18574 report_fatal_error("Invalid register global variable type");
18575
18577 if (!Reg)
18578 return Reg;
18579
18580 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18581 // Need followup investigation as to why.
18582 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18583 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18584 StringRef(RegName) + "\"."));
18585
18586 // Convert GPR to GP8R register for 64bit.
18587 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18588 Reg = Reg.id() - PPC::R0 + PPC::X0;
18589
18590 return Reg;
18591}
18592
18594 // 32-bit SVR4 ABI access everything as got-indirect.
18595 if (Subtarget.is32BitELFABI())
18596 return true;
18597
18598 // AIX accesses everything indirectly through the TOC, which is similar to
18599 // the GOT.
18600 if (Subtarget.isAIXABI())
18601 return true;
18602
18604 // If it is small or large code model, module locals are accessed
18605 // indirectly by loading their address from .toc/.got.
18606 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18607 return true;
18608
18609 // JumpTable and BlockAddress are accessed as got-indirect.
18611 return true;
18612
18614 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18615
18616 return false;
18617}
18618
18619bool
18621 // The PowerPC target isn't yet aware of offsets.
18622 return false;
18623}
18624
18626 const CallBase &I,
18627 MachineFunction &MF,
18628 unsigned Intrinsic) const {
18629 switch (Intrinsic) {
18630 case Intrinsic::ppc_atomicrmw_xchg_i128:
18631 case Intrinsic::ppc_atomicrmw_add_i128:
18632 case Intrinsic::ppc_atomicrmw_sub_i128:
18633 case Intrinsic::ppc_atomicrmw_nand_i128:
18634 case Intrinsic::ppc_atomicrmw_and_i128:
18635 case Intrinsic::ppc_atomicrmw_or_i128:
18636 case Intrinsic::ppc_atomicrmw_xor_i128:
18637 case Intrinsic::ppc_cmpxchg_i128:
18638 Info.opc = ISD::INTRINSIC_W_CHAIN;
18639 Info.memVT = MVT::i128;
18640 Info.ptrVal = I.getArgOperand(0);
18641 Info.offset = 0;
18642 Info.align = Align(16);
18645 return true;
18646 case Intrinsic::ppc_atomic_load_i128:
18647 Info.opc = ISD::INTRINSIC_W_CHAIN;
18648 Info.memVT = MVT::i128;
18649 Info.ptrVal = I.getArgOperand(0);
18650 Info.offset = 0;
18651 Info.align = Align(16);
18653 return true;
18654 case Intrinsic::ppc_atomic_store_i128:
18655 Info.opc = ISD::INTRINSIC_VOID;
18656 Info.memVT = MVT::i128;
18657 Info.ptrVal = I.getArgOperand(2);
18658 Info.offset = 0;
18659 Info.align = Align(16);
18661 return true;
18662 case Intrinsic::ppc_altivec_lvx:
18663 case Intrinsic::ppc_altivec_lvxl:
18664 case Intrinsic::ppc_altivec_lvebx:
18665 case Intrinsic::ppc_altivec_lvehx:
18666 case Intrinsic::ppc_altivec_lvewx:
18667 case Intrinsic::ppc_vsx_lxvd2x:
18668 case Intrinsic::ppc_vsx_lxvw4x:
18669 case Intrinsic::ppc_vsx_lxvd2x_be:
18670 case Intrinsic::ppc_vsx_lxvw4x_be:
18671 case Intrinsic::ppc_vsx_lxvl:
18672 case Intrinsic::ppc_vsx_lxvll: {
18673 EVT VT;
18674 switch (Intrinsic) {
18675 case Intrinsic::ppc_altivec_lvebx:
18676 VT = MVT::i8;
18677 break;
18678 case Intrinsic::ppc_altivec_lvehx:
18679 VT = MVT::i16;
18680 break;
18681 case Intrinsic::ppc_altivec_lvewx:
18682 VT = MVT::i32;
18683 break;
18684 case Intrinsic::ppc_vsx_lxvd2x:
18685 case Intrinsic::ppc_vsx_lxvd2x_be:
18686 VT = MVT::v2f64;
18687 break;
18688 default:
18689 VT = MVT::v4i32;
18690 break;
18691 }
18692
18693 Info.opc = ISD::INTRINSIC_W_CHAIN;
18694 Info.memVT = VT;
18695 Info.ptrVal = I.getArgOperand(0);
18696 Info.offset = -VT.getStoreSize()+1;
18697 Info.size = 2*VT.getStoreSize()-1;
18698 Info.align = Align(1);
18699 Info.flags = MachineMemOperand::MOLoad;
18700 return true;
18701 }
18702 case Intrinsic::ppc_altivec_stvx:
18703 case Intrinsic::ppc_altivec_stvxl:
18704 case Intrinsic::ppc_altivec_stvebx:
18705 case Intrinsic::ppc_altivec_stvehx:
18706 case Intrinsic::ppc_altivec_stvewx:
18707 case Intrinsic::ppc_vsx_stxvd2x:
18708 case Intrinsic::ppc_vsx_stxvw4x:
18709 case Intrinsic::ppc_vsx_stxvd2x_be:
18710 case Intrinsic::ppc_vsx_stxvw4x_be:
18711 case Intrinsic::ppc_vsx_stxvl:
18712 case Intrinsic::ppc_vsx_stxvll: {
18713 EVT VT;
18714 switch (Intrinsic) {
18715 case Intrinsic::ppc_altivec_stvebx:
18716 VT = MVT::i8;
18717 break;
18718 case Intrinsic::ppc_altivec_stvehx:
18719 VT = MVT::i16;
18720 break;
18721 case Intrinsic::ppc_altivec_stvewx:
18722 VT = MVT::i32;
18723 break;
18724 case Intrinsic::ppc_vsx_stxvd2x:
18725 case Intrinsic::ppc_vsx_stxvd2x_be:
18726 VT = MVT::v2f64;
18727 break;
18728 default:
18729 VT = MVT::v4i32;
18730 break;
18731 }
18732
18733 Info.opc = ISD::INTRINSIC_VOID;
18734 Info.memVT = VT;
18735 Info.ptrVal = I.getArgOperand(1);
18736 Info.offset = -VT.getStoreSize()+1;
18737 Info.size = 2*VT.getStoreSize()-1;
18738 Info.align = Align(1);
18739 Info.flags = MachineMemOperand::MOStore;
18740 return true;
18741 }
18742 case Intrinsic::ppc_stdcx:
18743 case Intrinsic::ppc_stwcx:
18744 case Intrinsic::ppc_sthcx:
18745 case Intrinsic::ppc_stbcx: {
18746 EVT VT;
18747 auto Alignment = Align(8);
18748 switch (Intrinsic) {
18749 case Intrinsic::ppc_stdcx:
18750 VT = MVT::i64;
18751 break;
18752 case Intrinsic::ppc_stwcx:
18753 VT = MVT::i32;
18754 Alignment = Align(4);
18755 break;
18756 case Intrinsic::ppc_sthcx:
18757 VT = MVT::i16;
18758 Alignment = Align(2);
18759 break;
18760 case Intrinsic::ppc_stbcx:
18761 VT = MVT::i8;
18762 Alignment = Align(1);
18763 break;
18764 }
18765 Info.opc = ISD::INTRINSIC_W_CHAIN;
18766 Info.memVT = VT;
18767 Info.ptrVal = I.getArgOperand(0);
18768 Info.offset = 0;
18769 Info.align = Alignment;
18771 return true;
18772 }
18773 default:
18774 break;
18775 }
18776
18777 return false;
18778}
18779
18780/// It returns EVT::Other if the type should be determined using generic
18781/// target-independent logic.
18783 LLVMContext &Context, const MemOp &Op,
18784 const AttributeList &FuncAttributes) const {
18785 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18786 // We should use Altivec/VSX loads and stores when available. For unaligned
18787 // addresses, unaligned VSX loads are only fast starting with the P8.
18788 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18789 if (Op.isMemset() && Subtarget.hasVSX()) {
18790 uint64_t TailSize = Op.size() % 16;
18791 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18792 // element if vector element type matches tail store. For tail size
18793 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18794 if (TailSize > 2 && TailSize <= 4) {
18795 return MVT::v8i16;
18796 }
18797 return MVT::v4i32;
18798 }
18799 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18800 return MVT::v4i32;
18801 }
18802 }
18803
18804 if (Subtarget.isPPC64()) {
18805 return MVT::i64;
18806 }
18807
18808 return MVT::i32;
18809}
18810
18811/// Returns true if it is beneficial to convert a load of a constant
18812/// to just the constant itself.
18814 Type *Ty) const {
18815 assert(Ty->isIntegerTy());
18816
18817 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18818 return !(BitSize == 0 || BitSize > 64);
18819}
18820
18822 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18823 return false;
18824 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18825 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18826 return NumBits1 == 64 && NumBits2 == 32;
18827}
18828
18830 if (!VT1.isInteger() || !VT2.isInteger())
18831 return false;
18832 unsigned NumBits1 = VT1.getSizeInBits();
18833 unsigned NumBits2 = VT2.getSizeInBits();
18834 return NumBits1 == 64 && NumBits2 == 32;
18835}
18836
18838 // Generally speaking, zexts are not free, but they are free when they can be
18839 // folded with other operations.
18840 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18841 EVT MemVT = LD->getMemoryVT();
18842 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18843 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18844 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18845 LD->getExtensionType() == ISD::ZEXTLOAD))
18846 return true;
18847 }
18848
18849 // FIXME: Add other cases...
18850 // - 32-bit shifts with a zext to i64
18851 // - zext after ctlz, bswap, etc.
18852 // - zext after and by a constant mask
18853
18854 return TargetLowering::isZExtFree(Val, VT2);
18855}
18856
18857bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18858 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18859 "invalid fpext types");
18860 // Extending to float128 is not free.
18861 if (DestVT == MVT::f128)
18862 return false;
18863 return true;
18864}
18865
18867 return isInt<16>(Imm) || isUInt<16>(Imm);
18868}
18869
18871 return isInt<16>(Imm) || isUInt<16>(Imm);
18872}
18873
18876 unsigned *Fast) const {
18878 return false;
18879
18880 // PowerPC supports unaligned memory access for simple non-vector types.
18881 // Although accessing unaligned addresses is not as efficient as accessing
18882 // aligned addresses, it is generally more efficient than manual expansion,
18883 // and generally only traps for software emulation when crossing page
18884 // boundaries.
18885
18886 if (!VT.isSimple())
18887 return false;
18888
18889 if (VT.isFloatingPoint() && !VT.isVector() &&
18890 !Subtarget.allowsUnalignedFPAccess())
18891 return false;
18892
18893 if (VT.getSimpleVT().isVector()) {
18894 if (Subtarget.hasVSX()) {
18895 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18896 VT != MVT::v4f32 && VT != MVT::v4i32)
18897 return false;
18898 } else {
18899 return false;
18900 }
18901 }
18902
18903 if (VT == MVT::ppcf128)
18904 return false;
18905
18906 if (Fast)
18907 *Fast = 1;
18908
18909 return true;
18910}
18911
18913 SDValue C) const {
18914 // Check integral scalar types.
18915 if (!VT.isScalarInteger())
18916 return false;
18917 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18918 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18919 return false;
18920 // This transformation will generate >= 2 operations. But the following
18921 // cases will generate <= 2 instructions during ISEL. So exclude them.
18922 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18923 // HW instruction, ie. MULLI
18924 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18925 // instruction is needed than case 1, ie. MULLI and RLDICR
18926 int64_t Imm = ConstNode->getSExtValue();
18927 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18928 Imm >>= Shift;
18929 if (isInt<16>(Imm))
18930 return false;
18931 uint64_t UImm = static_cast<uint64_t>(Imm);
18932 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18933 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18934 return true;
18935 }
18936 return false;
18937}
18938
18944
18946 Type *Ty) const {
18947 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18948 return false;
18949 switch (Ty->getScalarType()->getTypeID()) {
18950 case Type::FloatTyID:
18951 case Type::DoubleTyID:
18952 return true;
18953 case Type::FP128TyID:
18954 return Subtarget.hasP9Vector();
18955 default:
18956 return false;
18957 }
18958}
18959
18960// FIXME: add more patterns which are not profitable to hoist.
18962 if (!I->hasOneUse())
18963 return true;
18964
18965 Instruction *User = I->user_back();
18966 assert(User && "A single use instruction with no uses.");
18967
18968 switch (I->getOpcode()) {
18969 case Instruction::FMul: {
18970 // Don't break FMA, PowerPC prefers FMA.
18971 if (User->getOpcode() != Instruction::FSub &&
18972 User->getOpcode() != Instruction::FAdd)
18973 return true;
18974
18976 const Function *F = I->getFunction();
18977 const DataLayout &DL = F->getDataLayout();
18978 Type *Ty = User->getOperand(0)->getType();
18979 bool AllowContract = I->getFastMathFlags().allowContract() &&
18980 User->getFastMathFlags().allowContract();
18981
18982 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18984 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
18985 }
18986 case Instruction::Load: {
18987 // Don't break "store (load float*)" pattern, this pattern will be combined
18988 // to "store (load int32)" in later InstCombine pass. See function
18989 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18990 // cycles than loading a 32 bit integer.
18991 LoadInst *LI = cast<LoadInst>(I);
18992 // For the loads that combineLoadToOperationType does nothing, like
18993 // ordered load, it should be profitable to hoist them.
18994 // For swifterror load, it can only be used for pointer to pointer type, so
18995 // later type check should get rid of this case.
18996 if (!LI->isUnordered())
18997 return true;
18998
18999 if (User->getOpcode() != Instruction::Store)
19000 return true;
19001
19002 if (I->getType()->getTypeID() != Type::FloatTyID)
19003 return true;
19004
19005 return false;
19006 }
19007 default:
19008 return true;
19009 }
19010 return true;
19011}
19012
19013const MCPhysReg *
19015 // LR is a callee-save register, but we must treat it as clobbered by any call
19016 // site. Hence we include LR in the scratch registers, which are in turn added
19017 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19018 // to CTR, which is used by any indirect call.
19019 static const MCPhysReg ScratchRegs[] = {
19020 PPC::X12, PPC::LR8, PPC::CTR8, 0
19021 };
19022
19023 return ScratchRegs;
19024}
19025
19027 const Constant *PersonalityFn) const {
19028 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19029}
19030
19032 const Constant *PersonalityFn) const {
19033 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19034}
19035
19036bool
19038 EVT VT , unsigned DefinedValues) const {
19039 if (VT == MVT::v2i64)
19040 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19041
19042 if (Subtarget.hasVSX())
19043 return true;
19044
19046}
19047
19049 if (DisableILPPref || Subtarget.enableMachineScheduler())
19051
19052 return Sched::ILP;
19053}
19054
19055// Create a fast isel object.
19057 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19058 const LibcallLoweringInfo *LibcallLowering) const {
19059 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19060}
19061
19062// 'Inverted' means the FMA opcode after negating one multiplicand.
19063// For example, (fma -a b c) = (fnmsub a b c)
19064static unsigned invertFMAOpcode(unsigned Opc) {
19065 switch (Opc) {
19066 default:
19067 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19068 case ISD::FMA:
19069 return PPCISD::FNMSUB;
19070 case PPCISD::FNMSUB:
19071 return ISD::FMA;
19072 }
19073}
19074
19076 bool LegalOps, bool OptForSize,
19078 unsigned Depth) const {
19080 return SDValue();
19081
19082 unsigned Opc = Op.getOpcode();
19083 EVT VT = Op.getValueType();
19084 SDNodeFlags Flags = Op.getNode()->getFlags();
19085
19086 switch (Opc) {
19087 case PPCISD::FNMSUB:
19088 if (!Op.hasOneUse() || !isTypeLegal(VT))
19089 break;
19090
19092 SDValue N0 = Op.getOperand(0);
19093 SDValue N1 = Op.getOperand(1);
19094 SDValue N2 = Op.getOperand(2);
19095 SDLoc Loc(Op);
19096
19098 SDValue NegN2 =
19099 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19100
19101 if (!NegN2)
19102 return SDValue();
19103
19104 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19105 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19106 // These transformations may change sign of zeroes. For example,
19107 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19108 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
19109 // Try and choose the cheaper one to negate.
19111 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19112 N0Cost, Depth + 1);
19113
19115 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19116 N1Cost, Depth + 1);
19117
19118 if (NegN0 && N0Cost <= N1Cost) {
19119 Cost = std::min(N0Cost, N2Cost);
19120 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19121 } else if (NegN1) {
19122 Cost = std::min(N1Cost, N2Cost);
19123 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19124 }
19125 }
19126
19127 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19128 if (isOperationLegal(ISD::FMA, VT)) {
19129 Cost = N2Cost;
19130 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19131 }
19132
19133 break;
19134 }
19135
19136 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19137 Cost, Depth);
19138}
19139
19140// Override to enable LOAD_STACK_GUARD lowering on Linux.
19142 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19143 return true;
19145}
19146
19148 bool ForCodeSize) const {
19149 if (!VT.isSimple() || !Subtarget.hasVSX())
19150 return false;
19151
19152 switch(VT.getSimpleVT().SimpleTy) {
19153 default:
19154 // For FP types that are currently not supported by PPC backend, return
19155 // false. Examples: f16, f80.
19156 return false;
19157 case MVT::f32:
19158 case MVT::f64: {
19159 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19160 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19161 return true;
19162 }
19163 bool IsExact;
19164 APSInt IntResult(16, false);
19165 // The rounding mode doesn't really matter because we only care about floats
19166 // that can be converted to integers exactly.
19167 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19168 // For exact values in the range [-16, 15] we can materialize the float.
19169 if (IsExact && IntResult <= 15 && IntResult >= -16)
19170 return true;
19171 return Imm.isZero();
19172 }
19173 case MVT::ppcf128:
19174 return Imm.isPosZero();
19175 }
19176}
19177
19178// For vector shift operation op, fold
19179// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19181 SelectionDAG &DAG) {
19182 SDValue N0 = N->getOperand(0);
19183 SDValue N1 = N->getOperand(1);
19184 EVT VT = N0.getValueType();
19185 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19186 unsigned Opcode = N->getOpcode();
19187 unsigned TargetOpcode;
19188
19189 switch (Opcode) {
19190 default:
19191 llvm_unreachable("Unexpected shift operation");
19192 case ISD::SHL:
19193 TargetOpcode = PPCISD::SHL;
19194 break;
19195 case ISD::SRL:
19196 TargetOpcode = PPCISD::SRL;
19197 break;
19198 case ISD::SRA:
19199 TargetOpcode = PPCISD::SRA;
19200 break;
19201 }
19202
19203 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19204 N1->getOpcode() == ISD::AND)
19205 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19206 if (Mask->getZExtValue() == OpSizeInBits - 1)
19207 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19208
19209 return SDValue();
19210}
19211
19212SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19213 DAGCombinerInfo &DCI) const {
19214 EVT VT = N->getValueType(0);
19215 assert(VT.isVector() && "Vector type expected.");
19216
19217 unsigned Opc = N->getOpcode();
19218 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19219 "Unexpected opcode.");
19220
19221 if (!isOperationLegal(Opc, VT))
19222 return SDValue();
19223
19224 EVT EltTy = VT.getScalarType();
19225 unsigned EltBits = EltTy.getSizeInBits();
19226 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19227 return SDValue();
19228
19229 SDValue N1 = N->getOperand(1);
19230 uint64_t SplatBits = 0;
19231 bool AddSplatCase = false;
19232 unsigned OpcN1 = N1.getOpcode();
19233 if (OpcN1 == PPCISD::VADD_SPLAT &&
19235 AddSplatCase = true;
19236 SplatBits = N1.getConstantOperandVal(0);
19237 }
19238
19239 if (!AddSplatCase) {
19240 if (OpcN1 != ISD::BUILD_VECTOR)
19241 return SDValue();
19242
19243 unsigned SplatBitSize;
19244 bool HasAnyUndefs;
19245 APInt APSplatBits, APSplatUndef;
19246 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19247 bool BVNIsConstantSplat =
19248 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19249 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19250 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19251 return SDValue();
19252 SplatBits = APSplatBits.getZExtValue();
19253 }
19254
19255 SDLoc DL(N);
19256 SDValue N0 = N->getOperand(0);
19257 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19258 // shift vector, which means the max value is 31/63. A shift vector of all
19259 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19260 // -16 to 15 range.
19261 if (SplatBits == (EltBits - 1)) {
19262 unsigned NewOpc;
19263 switch (Opc) {
19264 case ISD::SHL:
19265 NewOpc = PPCISD::SHL;
19266 break;
19267 case ISD::SRL:
19268 NewOpc = PPCISD::SRL;
19269 break;
19270 case ISD::SRA:
19271 NewOpc = PPCISD::SRA;
19272 break;
19273 }
19274 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19275 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19276 }
19277
19278 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19279 return SDValue();
19280
19281 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19282 // before the BUILD_VECTOR is replaced by a load.
19283 if (EltTy != MVT::i64 || SplatBits != 1)
19284 return SDValue();
19285
19286 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19287}
19288
19289SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19290 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19291 return Value;
19292
19293 if (N->getValueType(0).isVector())
19294 return combineVectorShift(N, DCI);
19295
19296 SDValue N0 = N->getOperand(0);
19297 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19298 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19299 N0.getOpcode() != ISD::SIGN_EXTEND ||
19300 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19301 N->getValueType(0) != MVT::i64)
19302 return SDValue();
19303
19304 // We can't save an operation here if the value is already extended, and
19305 // the existing shift is easier to combine.
19306 SDValue ExtsSrc = N0.getOperand(0);
19307 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19308 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19309 return SDValue();
19310
19311 SDLoc DL(N0);
19312 SDValue ShiftBy = SDValue(CN1, 0);
19313 // We want the shift amount to be i32 on the extswli, but the shift could
19314 // have an i64.
19315 if (ShiftBy.getValueType() == MVT::i64)
19316 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19317
19318 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19319 ShiftBy);
19320}
19321
19322SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19323 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19324 return Value;
19325
19326 if (N->getValueType(0).isVector())
19327 return combineVectorShift(N, DCI);
19328
19329 return SDValue();
19330}
19331
19332SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19333 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19334 return Value;
19335
19336 if (N->getValueType(0).isVector())
19337 return combineVectorShift(N, DCI);
19338
19339 return SDValue();
19340}
19341
19342// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19343// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19344// When C is zero, the equation (addi Z, -C) can be simplified to Z
19345// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19347 const PPCSubtarget &Subtarget) {
19348 if (!Subtarget.isPPC64())
19349 return SDValue();
19350
19351 SDValue LHS = N->getOperand(0);
19352 SDValue RHS = N->getOperand(1);
19353
19354 auto isZextOfCompareWithConstant = [](SDValue Op) {
19355 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19356 Op.getValueType() != MVT::i64)
19357 return false;
19358
19359 SDValue Cmp = Op.getOperand(0);
19360 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19361 Cmp.getOperand(0).getValueType() != MVT::i64)
19362 return false;
19363
19364 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19365 int64_t NegConstant = 0 - Constant->getSExtValue();
19366 // Due to the limitations of the addi instruction,
19367 // -C is required to be [-32768, 32767].
19368 return isInt<16>(NegConstant);
19369 }
19370
19371 return false;
19372 };
19373
19374 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19375 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19376
19377 // If there is a pattern, canonicalize a zext operand to the RHS.
19378 if (LHSHasPattern && !RHSHasPattern)
19379 std::swap(LHS, RHS);
19380 else if (!LHSHasPattern && !RHSHasPattern)
19381 return SDValue();
19382
19383 SDLoc DL(N);
19384 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19385 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19386 SDValue Cmp = RHS.getOperand(0);
19387 SDValue Z = Cmp.getOperand(0);
19388 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19389 int64_t NegConstant = 0 - Constant->getSExtValue();
19390
19391 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19392 default: break;
19393 case ISD::SETNE: {
19394 // when C == 0
19395 // --> addze X, (addic Z, -1).carry
19396 // /
19397 // add X, (zext(setne Z, C))--
19398 // \ when -32768 <= -C <= 32767 && C != 0
19399 // --> addze X, (addic (addi Z, -C), -1).carry
19400 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19401 DAG.getConstant(NegConstant, DL, MVT::i64));
19402 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19403 SDValue Addc =
19404 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19405 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19406 DAG.getConstant(0, DL, CarryType));
19407 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19408 DAG.getConstant(0, DL, MVT::i64),
19409 SDValue(Addc.getNode(), 1));
19410 }
19411 case ISD::SETEQ: {
19412 // when C == 0
19413 // --> addze X, (subfic Z, 0).carry
19414 // /
19415 // add X, (zext(sete Z, C))--
19416 // \ when -32768 <= -C <= 32767 && C != 0
19417 // --> addze X, (subfic (addi Z, -C), 0).carry
19418 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19419 DAG.getConstant(NegConstant, DL, MVT::i64));
19420 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19421 SDValue Subc =
19422 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19423 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19424 DAG.getConstant(0, DL, CarryType));
19425 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19426 DAG.getConstant(1UL, DL, CarryType));
19427 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19428 DAG.getConstant(0, DL, MVT::i64), Invert);
19429 }
19430 }
19431
19432 return SDValue();
19433}
19434
19435// Transform
19436// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19437// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19438// In this case both C1 and C2 must be known constants.
19439// C1+C2 must fit into a 34 bit signed integer.
19441 const PPCSubtarget &Subtarget) {
19442 if (!Subtarget.isUsingPCRelativeCalls())
19443 return SDValue();
19444
19445 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19446 // If we find that node try to cast the Global Address and the Constant.
19447 SDValue LHS = N->getOperand(0);
19448 SDValue RHS = N->getOperand(1);
19449
19450 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19451 std::swap(LHS, RHS);
19452
19453 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19454 return SDValue();
19455
19456 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19459
19460 // Check that both casts succeeded.
19461 if (!GSDN || !ConstNode)
19462 return SDValue();
19463
19464 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19465 SDLoc DL(GSDN);
19466
19467 // The signed int offset needs to fit in 34 bits.
19468 if (!isInt<34>(NewOffset))
19469 return SDValue();
19470
19471 // The new global address is a copy of the old global address except
19472 // that it has the updated Offset.
19473 SDValue GA =
19474 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19475 NewOffset, GSDN->getTargetFlags());
19476 SDValue MatPCRel =
19477 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19478 return MatPCRel;
19479}
19480
19481// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19482// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19483// Mathematical identity: X + 1 = X - (-1)
19484// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19485// Requirement: VSX feature for efficient xxleqv generation
19487 const PPCSubtarget &Subtarget) {
19488
19489 EVT VT = N->getValueType(0);
19490 if (!Subtarget.hasVSX())
19491 return SDValue();
19492
19493 // Handle v2i64, v4i32, v8i16 and v16i8 types
19494 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19495 VT == MVT::v2i64))
19496 return SDValue();
19497
19498 SDValue LHS = N->getOperand(0);
19499 SDValue RHS = N->getOperand(1);
19500
19501 // Check if RHS is BUILD_VECTOR
19502 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19503 return SDValue();
19504
19505 // Check if all the elements are 1
19506 unsigned NumOfEles = RHS.getNumOperands();
19507 for (unsigned i = 0; i < NumOfEles; ++i) {
19508 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19509 if (!CN || CN->getSExtValue() != 1)
19510 return SDValue();
19511 }
19512 SDLoc DL(N);
19513
19514 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19515 SmallVector<SDValue, 4> Ops(4, MinusOne);
19516 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19517
19518 // Bitcast to the target vector type
19519 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19520
19521 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19522}
19523
19524SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19525 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19526 return Value;
19527
19528 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19529 return Value;
19530
19531 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19532 return Value;
19533 return SDValue();
19534}
19535
19536// Detect TRUNCATE operations on bitcasts of float128 values.
19537// What we are looking for here is the situtation where we extract a subset
19538// of bits from a 128 bit float.
19539// This can be of two forms:
19540// 1) BITCAST of f128 feeding TRUNCATE
19541// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19542// The reason this is required is because we do not have a legal i128 type
19543// and so we want to prevent having to store the f128 and then reload part
19544// of it.
19545SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19546 DAGCombinerInfo &DCI) const {
19547 // If we are using CRBits then try that first.
19548 if (Subtarget.useCRBits()) {
19549 // Check if CRBits did anything and return that if it did.
19550 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19551 return CRTruncValue;
19552 }
19553
19554 SDLoc dl(N);
19555 SDValue Op0 = N->getOperand(0);
19556
19557 // Looking for a truncate of i128 to i64.
19558 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19559 return SDValue();
19560
19561 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19562
19563 // SRL feeding TRUNCATE.
19564 if (Op0.getOpcode() == ISD::SRL) {
19565 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19566 // The right shift has to be by 64 bits.
19567 if (!ConstNode || ConstNode->getZExtValue() != 64)
19568 return SDValue();
19569
19570 // Switch the element number to extract.
19571 EltToExtract = EltToExtract ? 0 : 1;
19572 // Update Op0 past the SRL.
19573 Op0 = Op0.getOperand(0);
19574 }
19575
19576 // BITCAST feeding a TRUNCATE possibly via SRL.
19577 if (Op0.getOpcode() == ISD::BITCAST &&
19578 Op0.getValueType() == MVT::i128 &&
19579 Op0.getOperand(0).getValueType() == MVT::f128) {
19580 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19581 return DCI.DAG.getNode(
19582 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19583 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19584 }
19585 return SDValue();
19586}
19587
19588SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19589 SelectionDAG &DAG = DCI.DAG;
19590
19591 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19592 if (!ConstOpOrElement)
19593 return SDValue();
19594
19595 // An imul is usually smaller than the alternative sequence for legal type.
19597 isOperationLegal(ISD::MUL, N->getValueType(0)))
19598 return SDValue();
19599
19600 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19601 switch (this->Subtarget.getCPUDirective()) {
19602 default:
19603 // TODO: enhance the condition for subtarget before pwr8
19604 return false;
19605 case PPC::DIR_PWR8:
19606 // type mul add shl
19607 // scalar 4 1 1
19608 // vector 7 2 2
19609 return true;
19610 case PPC::DIR_PWR9:
19611 case PPC::DIR_PWR10:
19612 case PPC::DIR_PWR11:
19614 // type mul add shl
19615 // scalar 5 2 2
19616 // vector 7 2 2
19617
19618 // The cycle RATIO of related operations are showed as a table above.
19619 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19620 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19621 // are 4, it is always profitable; but for 3 instrs patterns
19622 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19623 // So we should only do it for vector type.
19624 return IsAddOne && IsNeg ? VT.isVector() : true;
19625 }
19626 };
19627
19628 EVT VT = N->getValueType(0);
19629 SDLoc DL(N);
19630
19631 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19632 bool IsNeg = MulAmt.isNegative();
19633 APInt MulAmtAbs = MulAmt.abs();
19634
19635 if ((MulAmtAbs - 1).isPowerOf2()) {
19636 // (mul x, 2^N + 1) => (add (shl x, N), x)
19637 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19638
19639 if (!IsProfitable(IsNeg, true, VT))
19640 return SDValue();
19641
19642 SDValue Op0 = N->getOperand(0);
19643 SDValue Op1 =
19644 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19645 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19646 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19647
19648 if (!IsNeg)
19649 return Res;
19650
19651 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19652 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19653 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19654 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19655
19656 if (!IsProfitable(IsNeg, false, VT))
19657 return SDValue();
19658
19659 SDValue Op0 = N->getOperand(0);
19660 SDValue Op1 =
19661 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19662 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19663
19664 if (!IsNeg)
19665 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19666 else
19667 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19668
19669 } else {
19670 return SDValue();
19671 }
19672}
19673
19674// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19675// in combiner since we need to check SD flags and other subtarget features.
19676SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19677 DAGCombinerInfo &DCI) const {
19678 SDValue N0 = N->getOperand(0);
19679 SDValue N1 = N->getOperand(1);
19680 SDValue N2 = N->getOperand(2);
19681 SDNodeFlags Flags = N->getFlags();
19682 EVT VT = N->getValueType(0);
19683 SelectionDAG &DAG = DCI.DAG;
19684 const TargetOptions &Options = getTargetMachine().Options;
19685 unsigned Opc = N->getOpcode();
19687 bool LegalOps = !DCI.isBeforeLegalizeOps();
19688 SDLoc Loc(N);
19689
19690 if (!isOperationLegal(ISD::FMA, VT))
19691 return SDValue();
19692
19693 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19694 // since (fnmsub a b c)=-0 while c-ab=+0.
19695 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19696 return SDValue();
19697
19698 // (fma (fneg a) b c) => (fnmsub a b c)
19699 // (fnmsub (fneg a) b c) => (fma a b c)
19700 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19701 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19702
19703 // (fma a (fneg b) c) => (fnmsub a b c)
19704 // (fnmsub a (fneg b) c) => (fma a b c)
19705 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19706 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19707
19708 return SDValue();
19709}
19710
19711bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19712 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19713 if (!Subtarget.is64BitELFABI())
19714 return false;
19715
19716 // If not a tail call then no need to proceed.
19717 if (!CI->isTailCall())
19718 return false;
19719
19720 // If sibling calls have been disabled and tail-calls aren't guaranteed
19721 // there is no reason to duplicate.
19722 auto &TM = getTargetMachine();
19723 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19724 return false;
19725
19726 // Can't tail call a function called indirectly, or if it has variadic args.
19727 const Function *Callee = CI->getCalledFunction();
19728 if (!Callee || Callee->isVarArg())
19729 return false;
19730
19731 // Make sure the callee and caller calling conventions are eligible for tco.
19732 const Function *Caller = CI->getParent()->getParent();
19733 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19734 CI->getCallingConv()))
19735 return false;
19736
19737 // If the function is local then we have a good chance at tail-calling it
19738 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19739}
19740
19741bool PPCTargetLowering::
19742isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19743 const Value *Mask = AndI.getOperand(1);
19744 // If the mask is suitable for andi. or andis. we should sink the and.
19745 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19746 // Can't handle constants wider than 64-bits.
19747 if (CI->getBitWidth() > 64)
19748 return false;
19749 int64_t ConstVal = CI->getZExtValue();
19750 return isUInt<16>(ConstVal) ||
19751 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19752 }
19753
19754 // For non-constant masks, we can always use the record-form and.
19755 return true;
19756}
19757
19758/// getAddrModeForFlags - Based on the set of address flags, select the most
19759/// optimal instruction format to match by.
19760PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19761 // This is not a node we should be handling here.
19762 if (Flags == PPC::MOF_None)
19763 return PPC::AM_None;
19764 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19765 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19766 if ((Flags & FlagSet) == FlagSet)
19767 return PPC::AM_DForm;
19768 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19769 if ((Flags & FlagSet) == FlagSet)
19770 return PPC::AM_DSForm;
19771 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19772 if ((Flags & FlagSet) == FlagSet)
19773 return PPC::AM_DQForm;
19774 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19775 if ((Flags & FlagSet) == FlagSet)
19776 return PPC::AM_PrefixDForm;
19777 // If no other forms are selected, return an X-Form as it is the most
19778 // general addressing mode.
19779 return PPC::AM_XForm;
19780}
19781
19782/// Set alignment flags based on whether or not the Frame Index is aligned.
19783/// Utilized when computing flags for address computation when selecting
19784/// load and store instructions.
19785static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19786 SelectionDAG &DAG) {
19787 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19788 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19789 if (!FI)
19790 return;
19792 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19793 // If this is (add $FI, $S16Imm), the alignment flags are already set
19794 // based on the immediate. We just need to clear the alignment flags
19795 // if the FI alignment is weaker.
19796 if ((FrameIndexAlign % 4) != 0)
19797 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19798 if ((FrameIndexAlign % 16) != 0)
19799 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19800 // If the address is a plain FrameIndex, set alignment flags based on
19801 // FI alignment.
19802 if (!IsAdd) {
19803 if ((FrameIndexAlign % 4) == 0)
19804 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19805 if ((FrameIndexAlign % 16) == 0)
19806 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19807 }
19808}
19809
19810/// Given a node, compute flags that are used for address computation when
19811/// selecting load and store instructions. The flags computed are stored in
19812/// FlagSet. This function takes into account whether the node is a constant,
19813/// an ADD, OR, or a constant, and computes the address flags accordingly.
19814static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19815 SelectionDAG &DAG) {
19816 // Set the alignment flags for the node depending on if the node is
19817 // 4-byte or 16-byte aligned.
19818 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19819 if ((Imm & 0x3) == 0)
19820 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19821 if ((Imm & 0xf) == 0)
19822 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19823 };
19824
19826 // All 32-bit constants can be computed as LIS + Disp.
19827 const APInt &ConstImm = CN->getAPIntValue();
19828 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19829 FlagSet |= PPC::MOF_AddrIsSImm32;
19830 SetAlignFlagsForImm(ConstImm.getZExtValue());
19831 setAlignFlagsForFI(N, FlagSet, DAG);
19832 }
19833 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19834 FlagSet |= PPC::MOF_RPlusSImm34;
19835 else // Let constant materialization handle large constants.
19836 FlagSet |= PPC::MOF_NotAddNorCst;
19837 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19838 // This address can be represented as an addition of:
19839 // - Register + Imm16 (possibly a multiple of 4/16)
19840 // - Register + Imm34
19841 // - Register + PPCISD::Lo
19842 // - Register + Register
19843 // In any case, we won't have to match this as Base + Zero.
19844 SDValue RHS = N.getOperand(1);
19846 const APInt &ConstImm = CN->getAPIntValue();
19847 if (ConstImm.isSignedIntN(16)) {
19848 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19849 SetAlignFlagsForImm(ConstImm.getZExtValue());
19850 setAlignFlagsForFI(N, FlagSet, DAG);
19851 }
19852 if (ConstImm.isSignedIntN(34))
19853 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19854 else
19855 FlagSet |= PPC::MOF_RPlusR; // Register.
19856 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19857 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19858 else
19859 FlagSet |= PPC::MOF_RPlusR;
19860 } else { // The address computation is not a constant or an addition.
19861 setAlignFlagsForFI(N, FlagSet, DAG);
19862 FlagSet |= PPC::MOF_NotAddNorCst;
19863 }
19864}
19865
19866static bool isPCRelNode(SDValue N) {
19867 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19872}
19873
19874/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19875/// the address flags of the load/store instruction that is to be matched.
19876unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19877 SelectionDAG &DAG) const {
19878 unsigned FlagSet = PPC::MOF_None;
19879
19880 // Compute subtarget flags.
19881 if (!Subtarget.hasP9Vector())
19882 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19883 else
19884 FlagSet |= PPC::MOF_SubtargetP9;
19885
19886 if (Subtarget.hasPrefixInstrs())
19887 FlagSet |= PPC::MOF_SubtargetP10;
19888
19889 if (Subtarget.hasSPE())
19890 FlagSet |= PPC::MOF_SubtargetSPE;
19891
19892 // Check if we have a PCRel node and return early.
19893 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19894 return FlagSet;
19895
19896 // If the node is the paired load/store intrinsics, compute flags for
19897 // address computation and return early.
19898 unsigned ParentOp = Parent->getOpcode();
19899 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19900 (ParentOp == ISD::INTRINSIC_VOID))) {
19901 unsigned ID = Parent->getConstantOperandVal(1);
19902 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19903 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19904 ? Parent->getOperand(2)
19905 : Parent->getOperand(3);
19906 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19907 FlagSet |= PPC::MOF_Vector;
19908 return FlagSet;
19909 }
19910 }
19911
19912 // Mark this as something we don't want to handle here if it is atomic
19913 // or pre-increment instruction.
19914 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19915 if (LSB->isIndexed())
19916 return PPC::MOF_None;
19917
19918 // Compute in-memory type flags. This is based on if there are scalars,
19919 // floats or vectors.
19920 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19921 assert(MN && "Parent should be a MemSDNode!");
19922 EVT MemVT = MN->getMemoryVT();
19923 unsigned Size = MemVT.getSizeInBits();
19924 if (MemVT.isScalarInteger()) {
19925 assert(Size <= 128 &&
19926 "Not expecting scalar integers larger than 16 bytes!");
19927 if (Size < 32)
19928 FlagSet |= PPC::MOF_SubWordInt;
19929 else if (Size == 32)
19930 FlagSet |= PPC::MOF_WordInt;
19931 else
19932 FlagSet |= PPC::MOF_DoubleWordInt;
19933 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19934 if (Size == 128)
19935 FlagSet |= PPC::MOF_Vector;
19936 else if (Size == 256) {
19937 assert(Subtarget.pairedVectorMemops() &&
19938 "256-bit vectors are only available when paired vector memops is "
19939 "enabled!");
19940 FlagSet |= PPC::MOF_Vector;
19941 } else
19942 llvm_unreachable("Not expecting illegal vectors!");
19943 } else { // Floating point type: can be scalar, f128 or vector types.
19944 if (Size == 32 || Size == 64)
19945 FlagSet |= PPC::MOF_ScalarFloat;
19946 else if (MemVT == MVT::f128 || MemVT.isVector())
19947 FlagSet |= PPC::MOF_Vector;
19948 else
19949 llvm_unreachable("Not expecting illegal scalar floats!");
19950 }
19951
19952 // Compute flags for address computation.
19953 computeFlagsForAddressComputation(N, FlagSet, DAG);
19954
19955 // Compute type extension flags.
19956 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19957 switch (LN->getExtensionType()) {
19958 case ISD::SEXTLOAD:
19959 FlagSet |= PPC::MOF_SExt;
19960 break;
19961 case ISD::EXTLOAD:
19962 case ISD::ZEXTLOAD:
19963 FlagSet |= PPC::MOF_ZExt;
19964 break;
19965 case ISD::NON_EXTLOAD:
19966 FlagSet |= PPC::MOF_NoExt;
19967 break;
19968 }
19969 } else
19970 FlagSet |= PPC::MOF_NoExt;
19971
19972 // For integers, no extension is the same as zero extension.
19973 // We set the extension mode to zero extension so we don't have
19974 // to add separate entries in AddrModesMap for loads and stores.
19975 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19976 FlagSet |= PPC::MOF_ZExt;
19977 FlagSet &= ~PPC::MOF_NoExt;
19978 }
19979
19980 // If we don't have prefixed instructions, 34-bit constants should be
19981 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19982 bool IsNonP1034BitConst =
19984 FlagSet) == PPC::MOF_RPlusSImm34;
19985 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19986 IsNonP1034BitConst)
19987 FlagSet |= PPC::MOF_NotAddNorCst;
19988
19989 return FlagSet;
19990}
19991
19992/// SelectForceXFormMode - Given the specified address, force it to be
19993/// represented as an indexed [r+r] operation (an XForm instruction).
19995 SDValue &Base,
19996 SelectionDAG &DAG) const {
19997
19999 int16_t ForceXFormImm = 0;
20000 if (provablyDisjointOr(DAG, N) &&
20001 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20002 Disp = N.getOperand(0);
20003 Base = N.getOperand(1);
20004 return Mode;
20005 }
20006
20007 // If the address is the result of an add, we will utilize the fact that the
20008 // address calculation includes an implicit add. However, we can reduce
20009 // register pressure if we do not materialize a constant just for use as the
20010 // index register. We only get rid of the add if it is not an add of a
20011 // value and a 16-bit signed constant and both have a single use.
20012 if (N.getOpcode() == ISD::ADD &&
20013 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20014 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20015 Disp = N.getOperand(0);
20016 Base = N.getOperand(1);
20017 return Mode;
20018 }
20019
20020 // Otherwise, use R0 as the base register.
20021 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20022 N.getValueType());
20023 Base = N;
20024
20025 return Mode;
20026}
20027
20029 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20030 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20031 EVT ValVT = Val.getValueType();
20032 // If we are splitting a scalar integer into f64 parts (i.e. so they
20033 // can be placed into VFRC registers), we need to zero extend and
20034 // bitcast the values. This will ensure the value is placed into a
20035 // VSR using direct moves or stack operations as needed.
20036 if (PartVT == MVT::f64 &&
20037 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20038 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20039 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20040 Parts[0] = Val;
20041 return true;
20042 }
20043 return false;
20044}
20045
20046SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20047 SelectionDAG &DAG) const {
20048 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20050 EVT RetVT = Op.getValueType();
20051 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20052 SDValue Callee =
20053 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20054 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20056 for (const SDValue &N : Op->op_values()) {
20057 EVT ArgVT = N.getValueType();
20058 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20059 TargetLowering::ArgListEntry Entry(N, ArgTy);
20060 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20061 Entry.IsZExt = !Entry.IsSExt;
20062 Args.push_back(Entry);
20063 }
20064
20065 SDValue InChain = DAG.getEntryNode();
20066 SDValue TCChain = InChain;
20067 const Function &F = DAG.getMachineFunction().getFunction();
20068 bool isTailCall =
20069 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20070 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20071 if (isTailCall)
20072 InChain = TCChain;
20073 CLI.setDebugLoc(SDLoc(Op))
20074 .setChain(InChain)
20075 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20076 .setTailCall(isTailCall)
20077 .setSExtResult(SignExtend)
20078 .setZExtResult(!SignExtend)
20080 return TLI.LowerCallTo(CLI).first;
20081}
20082
20083SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20084 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20085 SelectionDAG &DAG) const {
20086 if (Op.getValueType() == MVT::f32)
20087 return lowerToLibCall(LibCallFloatName, Op, DAG);
20088
20089 if (Op.getValueType() == MVT::f64)
20090 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20091
20092 return SDValue();
20093}
20094
20095bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20096 SDNodeFlags Flags = Op.getNode()->getFlags();
20097 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20098 Flags.hasNoNaNs() && Flags.hasNoInfs();
20099}
20100
20101bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20102 return Op.getNode()->getFlags().hasApproximateFuncs();
20103}
20104
20105bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20107}
20108
20109SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20110 const char *LibCallFloatName,
20111 const char *LibCallDoubleNameFinite,
20112 const char *LibCallFloatNameFinite,
20113 SDValue Op,
20114 SelectionDAG &DAG) const {
20115 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20116 return SDValue();
20117
20118 if (!isLowringToMASSFiniteSafe(Op))
20119 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20120 DAG);
20121
20122 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20123 LibCallDoubleNameFinite, Op, DAG);
20124}
20125
20126SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20127 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20128 "__xl_powf_finite", Op, DAG);
20129}
20130
20131SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20132 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20133 "__xl_sinf_finite", Op, DAG);
20134}
20135
20136SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20137 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20138 "__xl_cosf_finite", Op, DAG);
20139}
20140
20141SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20142 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20143 "__xl_logf_finite", Op, DAG);
20144}
20145
20146SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20147 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20148 "__xl_log10f_finite", Op, DAG);
20149}
20150
20151SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20152 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20153 "__xl_expf_finite", Op, DAG);
20154}
20155
20156// If we happen to match to an aligned D-Form, check if the Frame Index is
20157// adequately aligned. If it is not, reset the mode to match to X-Form.
20158static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20161 return;
20162 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20165}
20166
20167/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20168/// compute the address flags of the node, get the optimal address mode based
20169/// on the flags, and set the Base and Disp based on the address mode.
20171 SDValue N, SDValue &Disp,
20172 SDValue &Base,
20173 SelectionDAG &DAG,
20174 MaybeAlign Align) const {
20175 SDLoc DL(Parent);
20176
20177 // Compute the address flags.
20178 unsigned Flags = computeMOFlags(Parent, N, DAG);
20179
20180 // Get the optimal address mode based on the Flags.
20181 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20182
20183 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20184 // Select an X-Form load if it is not.
20185 setXFormForUnalignedFI(N, Flags, Mode);
20186
20187 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20188 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20189 assert(Subtarget.isUsingPCRelativeCalls() &&
20190 "Must be using PC-Relative calls when a valid PC-Relative node is "
20191 "present!");
20192 Mode = PPC::AM_PCRel;
20193 }
20194
20195 // Set Base and Disp accordingly depending on the address mode.
20196 switch (Mode) {
20197 case PPC::AM_DForm:
20198 case PPC::AM_DSForm:
20199 case PPC::AM_DQForm: {
20200 // This is a register plus a 16-bit immediate. The base will be the
20201 // register and the displacement will be the immediate unless it
20202 // isn't sufficiently aligned.
20203 if (Flags & PPC::MOF_RPlusSImm16) {
20204 SDValue Op0 = N.getOperand(0);
20205 SDValue Op1 = N.getOperand(1);
20206 int16_t Imm = Op1->getAsZExtVal();
20207 if (!Align || isAligned(*Align, Imm)) {
20208 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20209 Base = Op0;
20211 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20212 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20213 }
20214 break;
20215 }
20216 }
20217 // This is a register plus the @lo relocation. The base is the register
20218 // and the displacement is the global address.
20219 else if (Flags & PPC::MOF_RPlusLo) {
20220 Disp = N.getOperand(1).getOperand(0); // The global address.
20225 Base = N.getOperand(0);
20226 break;
20227 }
20228 // This is a constant address at most 32 bits. The base will be
20229 // zero or load-immediate-shifted and the displacement will be
20230 // the low 16 bits of the address.
20231 else if (Flags & PPC::MOF_AddrIsSImm32) {
20232 auto *CN = cast<ConstantSDNode>(N);
20233 EVT CNType = CN->getValueType(0);
20234 uint64_t CNImm = CN->getZExtValue();
20235 // If this address fits entirely in a 16-bit sext immediate field, codegen
20236 // this as "d, 0".
20237 int16_t Imm;
20238 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20239 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20240 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20241 CNType);
20242 break;
20243 }
20244 // Handle 32-bit sext immediate with LIS + Addr mode.
20245 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20246 (!Align || isAligned(*Align, CNImm))) {
20247 int32_t Addr = (int32_t)CNImm;
20248 // Otherwise, break this down into LIS + Disp.
20249 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20250 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20251 MVT::i32);
20252 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20253 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20254 break;
20255 }
20256 }
20257 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20258 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20260 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20261 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20262 } else
20263 Base = N;
20264 break;
20265 }
20266 case PPC::AM_PrefixDForm: {
20267 int64_t Imm34 = 0;
20268 unsigned Opcode = N.getOpcode();
20269 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20270 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20271 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20272 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20273 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20274 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20275 else
20276 Base = N.getOperand(0);
20277 } else if (isIntS34Immediate(N, Imm34)) {
20278 // The address is a 34-bit signed immediate.
20279 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20280 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20281 }
20282 break;
20283 }
20284 case PPC::AM_PCRel: {
20285 // When selecting PC-Relative instructions, "Base" is not utilized as
20286 // we select the address as [PC+imm].
20287 Disp = N;
20288 break;
20289 }
20290 case PPC::AM_None:
20291 break;
20292 default: { // By default, X-Form is always available to be selected.
20293 // When a frame index is not aligned, we also match by XForm.
20295 Base = FI ? N : N.getOperand(1);
20296 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20297 N.getValueType())
20298 : N.getOperand(0);
20299 break;
20300 }
20301 }
20302 return Mode;
20303}
20304
20306 bool Return,
20307 bool IsVarArg) const {
20308 switch (CC) {
20309 case CallingConv::Cold:
20310 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20311 default:
20312 return CC_PPC64_ELF;
20313 }
20314}
20315
20317 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20318}
20319
20322 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20323 if (shouldInlineQuadwordAtomics() && Size == 128)
20325
20326 switch (AI->getOperation()) {
20332 default:
20334 }
20335
20336 llvm_unreachable("unreachable atomicrmw operation");
20337}
20338
20347
20348static Intrinsic::ID
20350 switch (BinOp) {
20351 default:
20352 llvm_unreachable("Unexpected AtomicRMW BinOp");
20354 return Intrinsic::ppc_atomicrmw_xchg_i128;
20355 case AtomicRMWInst::Add:
20356 return Intrinsic::ppc_atomicrmw_add_i128;
20357 case AtomicRMWInst::Sub:
20358 return Intrinsic::ppc_atomicrmw_sub_i128;
20359 case AtomicRMWInst::And:
20360 return Intrinsic::ppc_atomicrmw_and_i128;
20361 case AtomicRMWInst::Or:
20362 return Intrinsic::ppc_atomicrmw_or_i128;
20363 case AtomicRMWInst::Xor:
20364 return Intrinsic::ppc_atomicrmw_xor_i128;
20366 return Intrinsic::ppc_atomicrmw_nand_i128;
20367 }
20368}
20369
20371 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20372 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20373 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20374 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20375 Type *ValTy = Incr->getType();
20376 assert(ValTy->getPrimitiveSizeInBits() == 128);
20377 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20378 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20379 Value *IncrHi =
20380 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20381 Value *LoHi = Builder.CreateIntrinsic(
20383 {AlignedAddr, IncrLo, IncrHi});
20384 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20385 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20386 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20387 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20388 return Builder.CreateOr(
20389 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20390}
20391
20393 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20394 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20395 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20396 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20397 Type *ValTy = CmpVal->getType();
20398 assert(ValTy->getPrimitiveSizeInBits() == 128);
20399 Function *IntCmpXchg =
20400 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20401 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20402 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20403 Value *CmpHi =
20404 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20405 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20406 Value *NewHi =
20407 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20408 emitLeadingFence(Builder, CI, Ord);
20409 Value *LoHi =
20410 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20411 emitTrailingFence(Builder, CI, Ord);
20412 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20413 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20414 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20415 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20416 return Builder.CreateOr(
20417 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20418}
20419
20421 return Subtarget.useCRBits();
20422}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5975
bool isDenormal() const
Definition APFloat.h:1513
APInt bitcastToAPInt() const
Definition APFloat.h:1416
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1415
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
APInt abs() const
Get the absolute value.
Definition APInt.h:1804
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1405
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1731
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:214
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:712
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:776
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:709
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:872
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:905
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:651
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:192
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:133
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:975
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:139
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.