LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
226
227 // Custom lower inline assembly to check for special registers.
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
474
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
542
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
574
583 } else {
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
644 } else
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
650 else
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
685
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
697 }
710
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
738 } else {
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
755 }
756
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
772 // 64-bit PowerPC wants to expand i128 shifts itself.
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
788 }
791
792 if (Subtarget.hasVSX()) {
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
813 }
814 // First set operation action for all vector types to expand. Then we
815 // will selectively turn on ones that can be effectively codegen'd.
817 // add/sub are legal for all supported vector VT's.
820
821 // For v2i64, these are only valid with P8Vector. This is corrected after
822 // the loop.
823 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
828 }
829 else {
834 }
835
836 if (Subtarget.hasVSX()) {
842 }
843
844 // Vector instructions introduced in P8
845 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
848 }
849 else {
852 }
853
854 // Vector instructions introduced in P9
855 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
857 else
859
860 // We promote all shuffles to v16i8.
862 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
863
864 // We promote all non-typed operations to v4i32.
866 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
868 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
870 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
872 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
874 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
877 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
879 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
880
881 // No other operations are legal.
920
921 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922 setTruncStoreAction(VT, InnerVT, Expand);
925 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
926 }
927 }
929 if (!Subtarget.hasP8Vector()) {
930 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
931 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
932 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
933 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
934 }
935
936 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937 // with merges, splats, etc.
939
940 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
941 // are cheap, so handle them before they get expanded to scalar.
947
948 setOperationAction(ISD::AND , MVT::v4i32, Legal);
949 setOperationAction(ISD::OR , MVT::v4i32, Legal);
950 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
951 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
953 Subtarget.useCRBits() ? Legal : Expand);
954 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
964 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
967
968 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
970 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
971 if (Subtarget.hasAltivec())
972 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
974 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975 if (Subtarget.hasP8Altivec())
976 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
977
978 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
979 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
980 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
981 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
982
983 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
984 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
985
986 if (Subtarget.hasVSX()) {
987 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
988 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
990 }
991
992 if (Subtarget.hasP8Altivec())
993 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
994 else
995 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
996
997 if (Subtarget.isISA3_1()) {
998 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
999 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1000 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1001 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1002 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1003 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1004 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1005 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1006 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1007 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1008 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1009 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1010 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1011 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1012 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1013 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1014 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1015 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1016 }
1017
1018 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1019 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1020
1023 // LE is P8+/64-bit so direct moves are supported and these operations
1024 // are legal. The custom transformation requires 64-bit since we need a
1025 // pair of stores that will cover a 128-bit load for P10.
1026 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1030 }
1031
1036
1037 // Altivec does not contain unordered floating-point compare instructions
1038 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1039 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1040 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1041 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1042
1043 if (Subtarget.hasVSX()) {
1046 if (Subtarget.hasP8Vector()) {
1049 }
1050 if (Subtarget.hasDirectMove() && isPPC64) {
1059 }
1061
1062 // The nearbyint variants are not allowed to raise the inexact exception
1063 // so we can only code-gen them with fpexcept.ignore.
1068
1069 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1070 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1071 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1072 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1073 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1076
1077 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1078 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1081
1082 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1083 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1084
1085 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1086 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1087
1088 // Share the Altivec comparison restrictions.
1089 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1090 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1091 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1092 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1093
1094 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1095 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1096
1098
1099 if (Subtarget.hasP8Vector())
1100 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1101
1102 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1103
1104 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1105 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1106 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1107
1108 if (Subtarget.hasP8Altivec()) {
1109 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1110 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1111 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1112
1113 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1114 // SRL, but not for SRA because of the instructions available:
1115 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116 // doing
1117 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1118 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1119 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1120
1121 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1122 }
1123 else {
1124 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1125 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1126 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1127
1128 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1129
1130 // VSX v2i64 only supports non-arithmetic operations.
1131 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1132 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1133 }
1134
1135 if (Subtarget.isISA3_1())
1136 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1137 else
1138 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1139
1140 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1141 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1143 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1144
1146
1155
1156 // Custom handling for partial vectors of integers converted to
1157 // floating point. We already have optimal handling for v2i32 through
1158 // the DAG combine, so those aren't necessary.
1175
1176 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1177 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1178 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1179 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1182
1185
1186 // Handle constrained floating-point operations of vector.
1187 // The predictor is `hasVSX` because altivec instruction has
1188 // no exception but VSX vector instruction has.
1202
1216
1217 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1218 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1219
1220 for (MVT FPT : MVT::fp_valuetypes())
1221 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1222
1223 // Expand the SELECT to SELECT_CC
1225
1226 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1227 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1228
1229 // No implementation for these ops for PowerPC.
1231 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1232 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1233 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1236 }
1237
1238 if (Subtarget.hasP8Altivec()) {
1239 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1240 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1241 }
1242
1243 if (Subtarget.hasP9Vector()) {
1246
1247 // Test data class instructions store results in CR bits.
1248 if (Subtarget.useCRBits()) {
1253 }
1254
1255 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1256 // SRL, but not for SRA because of the instructions available:
1257 // VS{RL} and VS{RL}O.
1258 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1259 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1260 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1261
1262 setOperationAction(ISD::FADD, MVT::f128, Legal);
1263 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1264 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1265 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1267
1268 setOperationAction(ISD::FMA, MVT::f128, Legal);
1275
1277 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1279 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1282
1286
1287 // Handle constrained floating-point operations of fp128
1304 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1305 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1306 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1307 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1308 } else if (Subtarget.hasVSX()) {
1311
1312 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1313 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1314
1315 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316 // fp_to_uint and int_to_fp.
1319
1320 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1321 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1322 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1323 setOperationAction(ISD::FABS, MVT::f128, Expand);
1325 setOperationAction(ISD::FMA, MVT::f128, Expand);
1327
1328 // Expand the fp_extend if the target type is fp128.
1331
1332 // Expand the fp_round if the source type is fp128.
1333 for (MVT VT : {MVT::f32, MVT::f64}) {
1336 }
1337
1342
1343 // Lower following f128 select_cc pattern:
1344 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1346
1347 // We need to handle f128 SELECT_CC with integer result type.
1349 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1350 }
1351
1352 if (Subtarget.hasP9Altivec()) {
1353 if (Subtarget.isISA3_1()) {
1358 } else {
1361 }
1369
1370 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1371 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1372 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1373 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1374 }
1375
1376 if (Subtarget.hasP10Vector()) {
1378 }
1379 }
1380
1381 if (Subtarget.pairedVectorMemops()) {
1382 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1383 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1384 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1385 }
1386 if (Subtarget.hasMMA()) {
1387 if (Subtarget.isISAFuture()) {
1388 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1389 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1390 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1391 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1392 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1393 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1394 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1395 } else {
1396 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1397 }
1398 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1399 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1401 }
1402
1403 if (Subtarget.has64BitSupport())
1405
1406 if (Subtarget.isISA3_1())
1407 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1408
1409 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1410
1411 if (!isPPC64) {
1414 }
1415
1420 }
1421
1423
1424 if (Subtarget.hasAltivec()) {
1425 // Altivec instructions set fields to all zeros or all ones.
1427 }
1428
1431 else if (isPPC64)
1433 else
1435
1436 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438 // We have target-specific dag combine patterns for the following nodes:
1442 if (Subtarget.hasFPCVT())
1445 if (Subtarget.useCRBits())
1449
1451
1453
1454 if (Subtarget.useCRBits()) {
1456 }
1457
1458 if (Subtarget.hasP8Vector())
1460
1461 // With 32 condition bits, we don't need to sink (and duplicate) compares
1462 // aggressively in CodeGenPrep.
1463 if (Subtarget.useCRBits()) {
1465 }
1466
1467 // TODO: The default entry number is set to 64. This stops most jump table
1468 // generation on PPC. But it is good for current PPC HWs because the indirect
1469 // branch instruction mtctr to the jump table may lead to bad branch predict.
1470 // Re-evaluate this value on future HWs that can do better with mtctr.
1472
1473 // The default minimum of largest number in a BitTest cluster is 3.
1475
1477 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1478
1479 auto CPUDirective = Subtarget.getCPUDirective();
1480 switch (CPUDirective) {
1481 default: break;
1482 case PPC::DIR_970:
1483 case PPC::DIR_A2:
1484 case PPC::DIR_E500:
1485 case PPC::DIR_E500mc:
1486 case PPC::DIR_E5500:
1487 case PPC::DIR_PWR4:
1488 case PPC::DIR_PWR5:
1489 case PPC::DIR_PWR5X:
1490 case PPC::DIR_PWR6:
1491 case PPC::DIR_PWR6X:
1492 case PPC::DIR_PWR7:
1493 case PPC::DIR_PWR8:
1494 case PPC::DIR_PWR9:
1495 case PPC::DIR_PWR10:
1496 case PPC::DIR_PWR11:
1500 break;
1501 }
1502
1503 if (Subtarget.enableMachineScheduler())
1505 else
1507
1509
1510 // The Freescale cores do better with aggressive inlining of memcpy and
1511 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1512 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1513 MaxStoresPerMemset = 32;
1515 MaxStoresPerMemcpy = 32;
1519 } else if (CPUDirective == PPC::DIR_A2) {
1520 // The A2 also benefits from (very) aggressive inlining of memcpy and
1521 // friends. The overhead of a the function call, even when warm, can be
1522 // over one hundred cycles.
1523 MaxStoresPerMemset = 128;
1524 MaxStoresPerMemcpy = 128;
1525 MaxStoresPerMemmove = 128;
1526 MaxLoadsPerMemcmp = 128;
1527 } else {
1530 }
1531
1532 // Enable generation of STXVP instructions by default for mcpu=future.
1533 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1534 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1535 DisableAutoPairedVecSt = false;
1536
1537 IsStrictFPEnabled = true;
1538
1539 // Let the subtarget (CPU) decide if a predictable select is more expensive
1540 // than the corresponding branch. This information is used in CGP to decide
1541 // when to convert selects into branches.
1542 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1543
1545}
1546
1547// *********************************** NOTE ************************************
1548// For selecting load and store instructions, the addressing modes are defined
1549// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1550// patterns to match the load the store instructions.
1551//
1552// The TD definitions for the addressing modes correspond to their respective
1553// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1554// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1555// address mode flags of a particular node. Afterwards, the computed address
1556// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1557// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1558// accordingly, based on the preferred addressing mode.
1559//
1560// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1561// MemOpFlags contains all the possible flags that can be used to compute the
1562// optimal addressing mode for load and store instructions.
1563// AddrMode contains all the possible load and store addressing modes available
1564// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1565//
1566// When adding new load and store instructions, it is possible that new address
1567// flags may need to be added into MemOpFlags, and a new addressing mode will
1568// need to be added to AddrMode. An entry of the new addressing mode (consisting
1569// of the minimal and main distinguishing address flags for the new load/store
1570// instructions) will need to be added into initializeAddrModeMap() below.
1571// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1572// need to be updated to account for selecting the optimal addressing mode.
1573// *****************************************************************************
1574/// Initialize the map that relates the different addressing modes of the load
1575/// and store instructions to a set of flags. This ensures the load/store
1576/// instruction is correctly matched during instruction selection.
1577void PPCTargetLowering::initializeAddrModeMap() {
1578 AddrModesMap[PPC::AM_DForm] = {
1579 // LWZ, STW
1584 // LBZ, LHZ, STB, STH
1589 // LHA
1594 // LFS, LFD, STFS, STFD
1599 };
1600 AddrModesMap[PPC::AM_DSForm] = {
1601 // LWA
1605 // LD, STD
1609 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1613 };
1614 AddrModesMap[PPC::AM_DQForm] = {
1615 // LXV, STXV
1619 };
1620 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1622 // TODO: Add mapping for quadword load/store.
1623}
1624
1625/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1626/// the desired ByVal argument alignment.
1627static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1628 if (MaxAlign == MaxMaxAlign)
1629 return;
1630 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1631 if (MaxMaxAlign >= 32 &&
1632 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1633 MaxAlign = Align(32);
1634 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1635 MaxAlign < 16)
1636 MaxAlign = Align(16);
1637 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1638 Align EltAlign;
1639 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1640 if (EltAlign > MaxAlign)
1641 MaxAlign = EltAlign;
1642 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1643 for (auto *EltTy : STy->elements()) {
1644 Align EltAlign;
1645 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1646 if (EltAlign > MaxAlign)
1647 MaxAlign = EltAlign;
1648 if (MaxAlign == MaxMaxAlign)
1649 break;
1650 }
1651 }
1652}
1653
1654/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1655/// function arguments in the caller parameter area.
1657 const DataLayout &DL) const {
1658 // 16byte and wider vectors are passed on 16byte boundary.
1659 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1660 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1661 if (Subtarget.hasAltivec())
1662 getMaxByValAlign(Ty, Alignment, Align(16));
1663 return Alignment;
1664}
1665
1667 return Subtarget.useSoftFloat();
1668}
1669
1671 return Subtarget.hasSPE();
1672}
1673
1675 return VT.isScalarInteger();
1676}
1677
1679 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1680 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1681 return false;
1682
1683 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1684 if (VTy->getScalarType()->isIntegerTy()) {
1685 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1686 if (ElemSizeInBits == 32) {
1687 Index = Subtarget.isLittleEndian() ? 2 : 1;
1688 return true;
1689 }
1690 if (ElemSizeInBits == 64) {
1691 Index = Subtarget.isLittleEndian() ? 1 : 0;
1692 return true;
1693 }
1694 }
1695 }
1696 return false;
1697}
1698
1700 EVT VT) const {
1701 if (!VT.isVector())
1702 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1703
1705}
1706
1708 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1709 return true;
1710}
1711
1712//===----------------------------------------------------------------------===//
1713// Node matching predicates, for use by the tblgen matching code.
1714//===----------------------------------------------------------------------===//
1715
1716/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1719 return CFP->getValueAPF().isZero();
1720 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1721 // Maybe this has already been legalized into the constant pool?
1722 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1723 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1724 return CFP->getValueAPF().isZero();
1725 }
1726 return false;
1727}
1728
1729/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1730/// true if Op is undef or if it matches the specified value.
1731static bool isConstantOrUndef(int Op, int Val) {
1732 return Op < 0 || Op == Val;
1733}
1734
1735/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1736/// VPKUHUM instruction.
1737/// The ShuffleKind distinguishes between big-endian operations with
1738/// two different inputs (0), either-endian operations with two identical
1739/// inputs (1), and little-endian operations with two different inputs (2).
1740/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1742 SelectionDAG &DAG) {
1743 bool IsLE = DAG.getDataLayout().isLittleEndian();
1744 if (ShuffleKind == 0) {
1745 if (IsLE)
1746 return false;
1747 for (unsigned i = 0; i != 16; ++i)
1748 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1749 return false;
1750 } else if (ShuffleKind == 2) {
1751 if (!IsLE)
1752 return false;
1753 for (unsigned i = 0; i != 16; ++i)
1754 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1755 return false;
1756 } else if (ShuffleKind == 1) {
1757 unsigned j = IsLE ? 0 : 1;
1758 for (unsigned i = 0; i != 8; ++i)
1759 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1760 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1761 return false;
1762 }
1763 return true;
1764}
1765
1766/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1767/// VPKUWUM instruction.
1768/// The ShuffleKind distinguishes between big-endian operations with
1769/// two different inputs (0), either-endian operations with two identical
1770/// inputs (1), and little-endian operations with two different inputs (2).
1771/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1773 SelectionDAG &DAG) {
1774 bool IsLE = DAG.getDataLayout().isLittleEndian();
1775 if (ShuffleKind == 0) {
1776 if (IsLE)
1777 return false;
1778 for (unsigned i = 0; i != 16; i += 2)
1779 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1780 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1781 return false;
1782 } else if (ShuffleKind == 2) {
1783 if (!IsLE)
1784 return false;
1785 for (unsigned i = 0; i != 16; i += 2)
1786 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1787 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1788 return false;
1789 } else if (ShuffleKind == 1) {
1790 unsigned j = IsLE ? 0 : 2;
1791 for (unsigned i = 0; i != 8; i += 2)
1792 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1793 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1794 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1795 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1796 return false;
1797 }
1798 return true;
1799}
1800
1801/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1802/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1803/// current subtarget.
1804///
1805/// The ShuffleKind distinguishes between big-endian operations with
1806/// two different inputs (0), either-endian operations with two identical
1807/// inputs (1), and little-endian operations with two different inputs (2).
1808/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1810 SelectionDAG &DAG) {
1811 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1812 if (!Subtarget.hasP8Vector())
1813 return false;
1814
1815 bool IsLE = DAG.getDataLayout().isLittleEndian();
1816 if (ShuffleKind == 0) {
1817 if (IsLE)
1818 return false;
1819 for (unsigned i = 0; i != 16; i += 4)
1820 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1821 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1822 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1823 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1824 return false;
1825 } else if (ShuffleKind == 2) {
1826 if (!IsLE)
1827 return false;
1828 for (unsigned i = 0; i != 16; i += 4)
1829 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1830 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1831 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1832 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1833 return false;
1834 } else if (ShuffleKind == 1) {
1835 unsigned j = IsLE ? 0 : 4;
1836 for (unsigned i = 0; i != 8; i += 4)
1837 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1838 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1839 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1840 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1841 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1842 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1843 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1844 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1845 return false;
1846 }
1847 return true;
1848}
1849
1850/// isVMerge - Common function, used to match vmrg* shuffles.
1851///
1852static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1853 unsigned LHSStart, unsigned RHSStart) {
1854 if (N->getValueType(0) != MVT::v16i8)
1855 return false;
1856 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1857 "Unsupported merge size!");
1858
1859 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1860 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1861 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1862 LHSStart+j+i*UnitSize) ||
1863 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1864 RHSStart+j+i*UnitSize))
1865 return false;
1866 }
1867 return true;
1868}
1869
1870/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1871/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1872/// The ShuffleKind distinguishes between big-endian merges with two
1873/// different inputs (0), either-endian merges with two identical inputs (1),
1874/// and little-endian merges with two different inputs (2). For the latter,
1875/// the input operands are swapped (see PPCInstrAltivec.td).
1877 unsigned ShuffleKind, SelectionDAG &DAG) {
1878 if (DAG.getDataLayout().isLittleEndian()) {
1879 if (ShuffleKind == 1) // unary
1880 return isVMerge(N, UnitSize, 0, 0);
1881 else if (ShuffleKind == 2) // swapped
1882 return isVMerge(N, UnitSize, 0, 16);
1883 else
1884 return false;
1885 } else {
1886 if (ShuffleKind == 1) // unary
1887 return isVMerge(N, UnitSize, 8, 8);
1888 else if (ShuffleKind == 0) // normal
1889 return isVMerge(N, UnitSize, 8, 24);
1890 else
1891 return false;
1892 }
1893}
1894
1895/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1896/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1897/// The ShuffleKind distinguishes between big-endian merges with two
1898/// different inputs (0), either-endian merges with two identical inputs (1),
1899/// and little-endian merges with two different inputs (2). For the latter,
1900/// the input operands are swapped (see PPCInstrAltivec.td).
1902 unsigned ShuffleKind, SelectionDAG &DAG) {
1903 if (DAG.getDataLayout().isLittleEndian()) {
1904 if (ShuffleKind == 1) // unary
1905 return isVMerge(N, UnitSize, 8, 8);
1906 else if (ShuffleKind == 2) // swapped
1907 return isVMerge(N, UnitSize, 8, 24);
1908 else
1909 return false;
1910 } else {
1911 if (ShuffleKind == 1) // unary
1912 return isVMerge(N, UnitSize, 0, 0);
1913 else if (ShuffleKind == 0) // normal
1914 return isVMerge(N, UnitSize, 0, 16);
1915 else
1916 return false;
1917 }
1918}
1919
1920/**
1921 * Common function used to match vmrgew and vmrgow shuffles
1922 *
1923 * The indexOffset determines whether to look for even or odd words in
1924 * the shuffle mask. This is based on the of the endianness of the target
1925 * machine.
1926 * - Little Endian:
1927 * - Use offset of 0 to check for odd elements
1928 * - Use offset of 4 to check for even elements
1929 * - Big Endian:
1930 * - Use offset of 0 to check for even elements
1931 * - Use offset of 4 to check for odd elements
1932 * A detailed description of the vector element ordering for little endian and
1933 * big endian can be found at
1934 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1935 * Targeting your applications - what little endian and big endian IBM XL C/C++
1936 * compiler differences mean to you
1937 *
1938 * The mask to the shuffle vector instruction specifies the indices of the
1939 * elements from the two input vectors to place in the result. The elements are
1940 * numbered in array-access order, starting with the first vector. These vectors
1941 * are always of type v16i8, thus each vector will contain 16 elements of size
1942 * 8. More info on the shuffle vector can be found in the
1943 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1944 * Language Reference.
1945 *
1946 * The RHSStartValue indicates whether the same input vectors are used (unary)
1947 * or two different input vectors are used, based on the following:
1948 * - If the instruction uses the same vector for both inputs, the range of the
1949 * indices will be 0 to 15. In this case, the RHSStart value passed should
1950 * be 0.
1951 * - If the instruction has two different vectors then the range of the
1952 * indices will be 0 to 31. In this case, the RHSStart value passed should
1953 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1954 * to 31 specify elements in the second vector).
1955 *
1956 * \param[in] N The shuffle vector SD Node to analyze
1957 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1958 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1959 * vector to the shuffle_vector instruction
1960 * \return true iff this shuffle vector represents an even or odd word merge
1961 */
1962static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1963 unsigned RHSStartValue) {
1964 if (N->getValueType(0) != MVT::v16i8)
1965 return false;
1966
1967 for (unsigned i = 0; i < 2; ++i)
1968 for (unsigned j = 0; j < 4; ++j)
1969 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1970 i*RHSStartValue+j+IndexOffset) ||
1971 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1972 i*RHSStartValue+j+IndexOffset+8))
1973 return false;
1974 return true;
1975}
1976
1977/**
1978 * Determine if the specified shuffle mask is suitable for the vmrgew or
1979 * vmrgow instructions.
1980 *
1981 * \param[in] N The shuffle vector SD Node to analyze
1982 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1983 * \param[in] ShuffleKind Identify the type of merge:
1984 * - 0 = big-endian merge with two different inputs;
1985 * - 1 = either-endian merge with two identical inputs;
1986 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1987 * little-endian merges).
1988 * \param[in] DAG The current SelectionDAG
1989 * \return true iff this shuffle mask
1990 */
1992 unsigned ShuffleKind, SelectionDAG &DAG) {
1993 if (DAG.getDataLayout().isLittleEndian()) {
1994 unsigned indexOffset = CheckEven ? 4 : 0;
1995 if (ShuffleKind == 1) // Unary
1996 return isVMerge(N, indexOffset, 0);
1997 else if (ShuffleKind == 2) // swapped
1998 return isVMerge(N, indexOffset, 16);
1999 else
2000 return false;
2001 }
2002 else {
2003 unsigned indexOffset = CheckEven ? 0 : 4;
2004 if (ShuffleKind == 1) // Unary
2005 return isVMerge(N, indexOffset, 0);
2006 else if (ShuffleKind == 0) // Normal
2007 return isVMerge(N, indexOffset, 16);
2008 else
2009 return false;
2010 }
2011 return false;
2012}
2013
2014/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2015/// amount, otherwise return -1.
2016/// The ShuffleKind distinguishes between big-endian operations with two
2017/// different inputs (0), either-endian operations with two identical inputs
2018/// (1), and little-endian operations with two different inputs (2). For the
2019/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2020int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2021 SelectionDAG &DAG) {
2022 if (N->getValueType(0) != MVT::v16i8)
2023 return -1;
2024
2026
2027 // Find the first non-undef value in the shuffle mask.
2028 unsigned i;
2029 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2030 /*search*/;
2031
2032 if (i == 16) return -1; // all undef.
2033
2034 // Otherwise, check to see if the rest of the elements are consecutively
2035 // numbered from this value.
2036 unsigned ShiftAmt = SVOp->getMaskElt(i);
2037 if (ShiftAmt < i) return -1;
2038
2039 ShiftAmt -= i;
2040 bool isLE = DAG.getDataLayout().isLittleEndian();
2041
2042 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2043 // Check the rest of the elements to see if they are consecutive.
2044 for (++i; i != 16; ++i)
2045 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2046 return -1;
2047 } else if (ShuffleKind == 1) {
2048 // Check the rest of the elements to see if they are consecutive.
2049 for (++i; i != 16; ++i)
2050 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2051 return -1;
2052 } else
2053 return -1;
2054
2055 if (isLE)
2056 ShiftAmt = 16 - ShiftAmt;
2057
2058 return ShiftAmt;
2059}
2060
2061/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2062/// specifies a splat of a single element that is suitable for input to
2063/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2065 EVT VT = N->getValueType(0);
2066 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2067 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2068
2069 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2070 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2071
2072 // The consecutive indices need to specify an element, not part of two
2073 // different elements. So abandon ship early if this isn't the case.
2074 if (N->getMaskElt(0) % EltSize != 0)
2075 return false;
2076
2077 // This is a splat operation if each element of the permute is the same, and
2078 // if the value doesn't reference the second vector.
2079 unsigned ElementBase = N->getMaskElt(0);
2080
2081 // FIXME: Handle UNDEF elements too!
2082 if (ElementBase >= 16)
2083 return false;
2084
2085 // Check that the indices are consecutive, in the case of a multi-byte element
2086 // splatted with a v16i8 mask.
2087 for (unsigned i = 1; i != EltSize; ++i)
2088 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2089 return false;
2090
2091 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2092 // An UNDEF element is a sequence of UNDEF bytes.
2093 if (N->getMaskElt(i) < 0) {
2094 for (unsigned j = 1; j != EltSize; ++j)
2095 if (N->getMaskElt(i + j) >= 0)
2096 return false;
2097 } else
2098 for (unsigned j = 0; j != EltSize; ++j)
2099 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2100 return false;
2101 }
2102 return true;
2103}
2104
2105/// Check that the mask is shuffling N byte elements. Within each N byte
2106/// element of the mask, the indices could be either in increasing or
2107/// decreasing order as long as they are consecutive.
2108/// \param[in] N the shuffle vector SD Node to analyze
2109/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2110/// Word/DoubleWord/QuadWord).
2111/// \param[in] StepLen the delta indices number among the N byte element, if
2112/// the mask is in increasing/decreasing order then it is 1/-1.
2113/// \return true iff the mask is shuffling N byte elements.
2114static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2115 int StepLen) {
2116 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2117 "Unexpected element width.");
2118 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2119
2120 unsigned NumOfElem = 16 / Width;
2121 unsigned MaskVal[16]; // Width is never greater than 16
2122 for (unsigned i = 0; i < NumOfElem; ++i) {
2123 MaskVal[0] = N->getMaskElt(i * Width);
2124 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2125 return false;
2126 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2127 return false;
2128 }
2129
2130 for (unsigned int j = 1; j < Width; ++j) {
2131 MaskVal[j] = N->getMaskElt(i * Width + j);
2132 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2133 return false;
2134 }
2135 }
2136 }
2137
2138 return true;
2139}
2140
2141bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2142 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2143 if (!isNByteElemShuffleMask(N, 4, 1))
2144 return false;
2145
2146 // Now we look at mask elements 0,4,8,12
2147 unsigned M0 = N->getMaskElt(0) / 4;
2148 unsigned M1 = N->getMaskElt(4) / 4;
2149 unsigned M2 = N->getMaskElt(8) / 4;
2150 unsigned M3 = N->getMaskElt(12) / 4;
2151 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2152 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2153
2154 // Below, let H and L be arbitrary elements of the shuffle mask
2155 // where H is in the range [4,7] and L is in the range [0,3].
2156 // H, 1, 2, 3 or L, 5, 6, 7
2157 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2158 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2159 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2160 InsertAtByte = IsLE ? 12 : 0;
2161 Swap = M0 < 4;
2162 return true;
2163 }
2164 // 0, H, 2, 3 or 4, L, 6, 7
2165 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2166 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2167 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2168 InsertAtByte = IsLE ? 8 : 4;
2169 Swap = M1 < 4;
2170 return true;
2171 }
2172 // 0, 1, H, 3 or 4, 5, L, 7
2173 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2174 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2175 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2176 InsertAtByte = IsLE ? 4 : 8;
2177 Swap = M2 < 4;
2178 return true;
2179 }
2180 // 0, 1, 2, H or 4, 5, 6, L
2181 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2182 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2183 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2184 InsertAtByte = IsLE ? 0 : 12;
2185 Swap = M3 < 4;
2186 return true;
2187 }
2188
2189 // If both vector operands for the shuffle are the same vector, the mask will
2190 // contain only elements from the first one and the second one will be undef.
2191 if (N->getOperand(1).isUndef()) {
2192 ShiftElts = 0;
2193 Swap = true;
2194 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2195 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2196 InsertAtByte = IsLE ? 12 : 0;
2197 return true;
2198 }
2199 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2200 InsertAtByte = IsLE ? 8 : 4;
2201 return true;
2202 }
2203 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2204 InsertAtByte = IsLE ? 4 : 8;
2205 return true;
2206 }
2207 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2208 InsertAtByte = IsLE ? 0 : 12;
2209 return true;
2210 }
2211 }
2212
2213 return false;
2214}
2215
2217 bool &Swap, bool IsLE) {
2218 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2219 // Ensure each byte index of the word is consecutive.
2220 if (!isNByteElemShuffleMask(N, 4, 1))
2221 return false;
2222
2223 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2224 unsigned M0 = N->getMaskElt(0) / 4;
2225 unsigned M1 = N->getMaskElt(4) / 4;
2226 unsigned M2 = N->getMaskElt(8) / 4;
2227 unsigned M3 = N->getMaskElt(12) / 4;
2228
2229 // If both vector operands for the shuffle are the same vector, the mask will
2230 // contain only elements from the first one and the second one will be undef.
2231 if (N->getOperand(1).isUndef()) {
2232 assert(M0 < 4 && "Indexing into an undef vector?");
2233 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2234 return false;
2235
2236 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2237 Swap = false;
2238 return true;
2239 }
2240
2241 // Ensure each word index of the ShuffleVector Mask is consecutive.
2242 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2243 return false;
2244
2245 if (IsLE) {
2246 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2247 // Input vectors don't need to be swapped if the leading element
2248 // of the result is one of the 3 left elements of the second vector
2249 // (or if there is no shift to be done at all).
2250 Swap = false;
2251 ShiftElts = (8 - M0) % 8;
2252 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2253 // Input vectors need to be swapped if the leading element
2254 // of the result is one of the 3 left elements of the first vector
2255 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2256 Swap = true;
2257 ShiftElts = (4 - M0) % 4;
2258 }
2259
2260 return true;
2261 } else { // BE
2262 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2263 // Input vectors don't need to be swapped if the leading element
2264 // of the result is one of the 4 elements of the first vector.
2265 Swap = false;
2266 ShiftElts = M0;
2267 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2268 // Input vectors need to be swapped if the leading element
2269 // of the result is one of the 4 elements of the right vector.
2270 Swap = true;
2271 ShiftElts = M0 - 4;
2272 }
2273
2274 return true;
2275 }
2276}
2277
2279 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2280
2281 if (!isNByteElemShuffleMask(N, Width, -1))
2282 return false;
2283
2284 for (int i = 0; i < 16; i += Width)
2285 if (N->getMaskElt(i) != i + Width - 1)
2286 return false;
2287
2288 return true;
2289}
2290
2294
2298
2302
2306
2307/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2308/// if the inputs to the instruction should be swapped and set \p DM to the
2309/// value for the immediate.
2310/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2311/// AND element 0 of the result comes from the first input (LE) or second input
2312/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2313/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2314/// mask.
2316 bool &Swap, bool IsLE) {
2317 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2318
2319 // Ensure each byte index of the double word is consecutive.
2320 if (!isNByteElemShuffleMask(N, 8, 1))
2321 return false;
2322
2323 unsigned M0 = N->getMaskElt(0) / 8;
2324 unsigned M1 = N->getMaskElt(8) / 8;
2325 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2326
2327 // If both vector operands for the shuffle are the same vector, the mask will
2328 // contain only elements from the first one and the second one will be undef.
2329 if (N->getOperand(1).isUndef()) {
2330 if ((M0 | M1) < 2) {
2331 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2332 Swap = false;
2333 return true;
2334 } else
2335 return false;
2336 }
2337
2338 if (IsLE) {
2339 if (M0 > 1 && M1 < 2) {
2340 Swap = false;
2341 } else if (M0 < 2 && M1 > 1) {
2342 M0 = (M0 + 2) % 4;
2343 M1 = (M1 + 2) % 4;
2344 Swap = true;
2345 } else
2346 return false;
2347
2348 // Note: if control flow comes here that means Swap is already set above
2349 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2350 return true;
2351 } else { // BE
2352 if (M0 < 2 && M1 > 1) {
2353 Swap = false;
2354 } else if (M0 > 1 && M1 < 2) {
2355 M0 = (M0 + 2) % 4;
2356 M1 = (M1 + 2) % 4;
2357 Swap = true;
2358 } else
2359 return false;
2360
2361 // Note: if control flow comes here that means Swap is already set above
2362 DM = (M0 << 1) + (M1 & 1);
2363 return true;
2364 }
2365}
2366
2367
2368/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2369/// appropriate for PPC mnemonics (which have a big endian bias - namely
2370/// elements are counted from the left of the vector register).
2371unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2372 SelectionDAG &DAG) {
2374 assert(isSplatShuffleMask(SVOp, EltSize));
2375 EVT VT = SVOp->getValueType(0);
2376
2377 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2378 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2379 : SVOp->getMaskElt(0);
2380
2381 if (DAG.getDataLayout().isLittleEndian())
2382 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2383 else
2384 return SVOp->getMaskElt(0) / EltSize;
2385}
2386
2387/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2388/// by using a vspltis[bhw] instruction of the specified element size, return
2389/// the constant being splatted. The ByteSize field indicates the number of
2390/// bytes of each element [124] -> [bhw].
2392 SDValue OpVal;
2393
2394 // If ByteSize of the splat is bigger than the element size of the
2395 // build_vector, then we have a case where we are checking for a splat where
2396 // multiple elements of the buildvector are folded together into a single
2397 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2398 unsigned EltSize = 16/N->getNumOperands();
2399 if (EltSize < ByteSize) {
2400 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2401 SDValue UniquedVals[4];
2402 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2403
2404 // See if all of the elements in the buildvector agree across.
2405 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2406 if (N->getOperand(i).isUndef()) continue;
2407 // If the element isn't a constant, bail fully out.
2408 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2409
2410 if (!UniquedVals[i&(Multiple-1)].getNode())
2411 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2412 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2413 return SDValue(); // no match.
2414 }
2415
2416 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2417 // either constant or undef values that are identical for each chunk. See
2418 // if these chunks can form into a larger vspltis*.
2419
2420 // Check to see if all of the leading entries are either 0 or -1. If
2421 // neither, then this won't fit into the immediate field.
2422 bool LeadingZero = true;
2423 bool LeadingOnes = true;
2424 for (unsigned i = 0; i != Multiple-1; ++i) {
2425 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2426
2427 LeadingZero &= isNullConstant(UniquedVals[i]);
2428 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2429 }
2430 // Finally, check the least significant entry.
2431 if (LeadingZero) {
2432 if (!UniquedVals[Multiple-1].getNode())
2433 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2434 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2435 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2436 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2437 }
2438 if (LeadingOnes) {
2439 if (!UniquedVals[Multiple-1].getNode())
2440 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2441 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2442 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2443 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2444 }
2445
2446 return SDValue();
2447 }
2448
2449 // Check to see if this buildvec has a single non-undef value in its elements.
2450 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2451 if (N->getOperand(i).isUndef()) continue;
2452 if (!OpVal.getNode())
2453 OpVal = N->getOperand(i);
2454 else if (OpVal != N->getOperand(i))
2455 return SDValue();
2456 }
2457
2458 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2459
2460 unsigned ValSizeInBytes = EltSize;
2461 uint64_t Value = 0;
2462 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2463 Value = CN->getZExtValue();
2464 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2465 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2466 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2467 }
2468
2469 // If the splat value is larger than the element value, then we can never do
2470 // this splat. The only case that we could fit the replicated bits into our
2471 // immediate field for would be zero, and we prefer to use vxor for it.
2472 if (ValSizeInBytes < ByteSize) return SDValue();
2473
2474 // If the element value is larger than the splat value, check if it consists
2475 // of a repeated bit pattern of size ByteSize.
2476 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2477 return SDValue();
2478
2479 // Properly sign extend the value.
2480 int MaskVal = SignExtend32(Value, ByteSize * 8);
2481
2482 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2483 if (MaskVal == 0) return SDValue();
2484
2485 // Finally, if this value fits in a 5 bit sext field, return it
2486 if (SignExtend32<5>(MaskVal) == MaskVal)
2487 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2488 return SDValue();
2489}
2490
2491//===----------------------------------------------------------------------===//
2492// Addressing Mode Selection
2493//===----------------------------------------------------------------------===//
2494
2495/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2496/// or 64-bit immediate, and if the value can be accurately represented as a
2497/// sign extension from a 16-bit value. If so, this returns true and the
2498/// immediate.
2499bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2500 if (!isa<ConstantSDNode>(N))
2501 return false;
2502
2503 Imm = (int16_t)N->getAsZExtVal();
2504 if (N->getValueType(0) == MVT::i32)
2505 return Imm == (int32_t)N->getAsZExtVal();
2506 else
2507 return Imm == (int64_t)N->getAsZExtVal();
2508}
2510 return isIntS16Immediate(Op.getNode(), Imm);
2511}
2512
2513/// Used when computing address flags for selecting loads and stores.
2514/// If we have an OR, check if the LHS and RHS are provably disjoint.
2515/// An OR of two provably disjoint values is equivalent to an ADD.
2516/// Most PPC load/store instructions compute the effective address as a sum,
2517/// so doing this conversion is useful.
2518static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2519 if (N.getOpcode() != ISD::OR)
2520 return false;
2521 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2522 if (!LHSKnown.Zero.getBoolValue())
2523 return false;
2524 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2525 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2526}
2527
2528/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2529/// be represented as an indexed [r+r] operation.
2531 SDValue &Index,
2532 SelectionDAG &DAG) const {
2533 for (SDNode *U : N->users()) {
2534 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2535 if (Memop->getMemoryVT() == MVT::f64) {
2536 Base = N.getOperand(0);
2537 Index = N.getOperand(1);
2538 return true;
2539 }
2540 }
2541 }
2542 return false;
2543}
2544
2545/// isIntS34Immediate - This method tests if value of node given can be
2546/// accurately represented as a sign extension from a 34-bit value. If so,
2547/// this returns true and the immediate.
2548bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2549 if (!isa<ConstantSDNode>(N))
2550 return false;
2551
2552 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2553 return isInt<34>(Imm);
2554}
2556 return isIntS34Immediate(Op.getNode(), Imm);
2557}
2558
2559/// SelectAddressRegReg - Given the specified addressed, check to see if it
2560/// can be represented as an indexed [r+r] operation. Returns false if it
2561/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2562/// non-zero and N can be represented by a base register plus a signed 16-bit
2563/// displacement, make a more precise judgement by checking (displacement % \p
2564/// EncodingAlignment).
2566 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2567 MaybeAlign EncodingAlignment) const {
2568 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2569 // a [pc+imm].
2571 return false;
2572
2573 int16_t Imm = 0;
2574 if (N.getOpcode() == ISD::ADD) {
2575 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2576 // SPE load/store can only handle 8-bit offsets.
2577 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2578 return true;
2579 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2580 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2581 return false; // r+i
2582 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2583 return false; // r+i
2584
2585 Base = N.getOperand(0);
2586 Index = N.getOperand(1);
2587 return true;
2588 } else if (N.getOpcode() == ISD::OR) {
2589 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2590 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2591 return false; // r+i can fold it if we can.
2592
2593 // If this is an or of disjoint bitfields, we can codegen this as an add
2594 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2595 // disjoint.
2596 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2597
2598 if (LHSKnown.Zero.getBoolValue()) {
2599 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2600 // If all of the bits are known zero on the LHS or RHS, the add won't
2601 // carry.
2602 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2603 Base = N.getOperand(0);
2604 Index = N.getOperand(1);
2605 return true;
2606 }
2607 }
2608 }
2609
2610 return false;
2611}
2612
2613// If we happen to be doing an i64 load or store into a stack slot that has
2614// less than a 4-byte alignment, then the frame-index elimination may need to
2615// use an indexed load or store instruction (because the offset may not be a
2616// multiple of 4). The extra register needed to hold the offset comes from the
2617// register scavenger, and it is possible that the scavenger will need to use
2618// an emergency spill slot. As a result, we need to make sure that a spill slot
2619// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2620// stack slot.
2621static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2622 // FIXME: This does not handle the LWA case.
2623 if (VT != MVT::i64)
2624 return;
2625
2626 // NOTE: We'll exclude negative FIs here, which come from argument
2627 // lowering, because there are no known test cases triggering this problem
2628 // using packed structures (or similar). We can remove this exclusion if
2629 // we find such a test case. The reason why this is so test-case driven is
2630 // because this entire 'fixup' is only to prevent crashes (from the
2631 // register scavenger) on not-really-valid inputs. For example, if we have:
2632 // %a = alloca i1
2633 // %b = bitcast i1* %a to i64*
2634 // store i64* a, i64 b
2635 // then the store should really be marked as 'align 1', but is not. If it
2636 // were marked as 'align 1' then the indexed form would have been
2637 // instruction-selected initially, and the problem this 'fixup' is preventing
2638 // won't happen regardless.
2639 if (FrameIdx < 0)
2640 return;
2641
2643 MachineFrameInfo &MFI = MF.getFrameInfo();
2644
2645 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2646 return;
2647
2648 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2649 FuncInfo->setHasNonRISpills();
2650}
2651
2652/// Returns true if the address N can be represented by a base register plus
2653/// a signed 16-bit displacement [r+imm], and if it is not better
2654/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2655/// displacements that are multiples of that value.
2657 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2658 MaybeAlign EncodingAlignment) const {
2659 // FIXME dl should come from parent load or store, not from address
2660 SDLoc dl(N);
2661
2662 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2663 // a [pc+imm].
2665 return false;
2666
2667 // If this can be more profitably realized as r+r, fail.
2668 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2669 return false;
2670
2671 if (N.getOpcode() == ISD::ADD) {
2672 int16_t imm = 0;
2673 if (isIntS16Immediate(N.getOperand(1), imm) &&
2674 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2675 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2676 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2677 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2678 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2679 } else {
2680 Base = N.getOperand(0);
2681 }
2682 return true; // [r+i]
2683 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2684 // Match LOAD (ADD (X, Lo(G))).
2685 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2686 "Cannot handle constant offsets yet!");
2687 Disp = N.getOperand(1).getOperand(0); // The global address.
2692 Base = N.getOperand(0);
2693 return true; // [&g+r]
2694 }
2695 } else if (N.getOpcode() == ISD::OR) {
2696 int16_t imm = 0;
2697 if (isIntS16Immediate(N.getOperand(1), imm) &&
2698 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2699 // If this is an or of disjoint bitfields, we can codegen this as an add
2700 // (for better address arithmetic) if the LHS and RHS of the OR are
2701 // provably disjoint.
2702 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2703
2704 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2705 // If all of the bits are known zero on the LHS or RHS, the add won't
2706 // carry.
2707 if (FrameIndexSDNode *FI =
2708 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2709 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2710 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2711 } else {
2712 Base = N.getOperand(0);
2713 }
2714 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2715 return true;
2716 }
2717 }
2718 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2719 // Loading from a constant address.
2720
2721 // If this address fits entirely in a 16-bit sext immediate field, codegen
2722 // this as "d, 0"
2723 int16_t Imm;
2724 if (isIntS16Immediate(CN, Imm) &&
2725 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2726 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2727 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2728 CN->getValueType(0));
2729 return true;
2730 }
2731
2732 // Handle 32-bit sext immediates with LIS + addr mode.
2733 if ((CN->getValueType(0) == MVT::i32 ||
2734 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2735 (!EncodingAlignment ||
2736 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2737 int Addr = (int)CN->getZExtValue();
2738
2739 // Otherwise, break this down into an LIS + disp.
2740 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2741
2742 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2743 MVT::i32);
2744 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2745 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2746 return true;
2747 }
2748 }
2749
2750 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2752 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2753 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2754 } else
2755 Base = N;
2756 return true; // [r+0]
2757}
2758
2759/// Similar to the 16-bit case but for instructions that take a 34-bit
2760/// displacement field (prefixed loads/stores).
2762 SDValue &Base,
2763 SelectionDAG &DAG) const {
2764 // Only on 64-bit targets.
2765 if (N.getValueType() != MVT::i64)
2766 return false;
2767
2768 SDLoc dl(N);
2769 int64_t Imm = 0;
2770
2771 if (N.getOpcode() == ISD::ADD) {
2772 if (!isIntS34Immediate(N.getOperand(1), Imm))
2773 return false;
2774 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2775 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2776 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2777 else
2778 Base = N.getOperand(0);
2779 return true;
2780 }
2781
2782 if (N.getOpcode() == ISD::OR) {
2783 if (!isIntS34Immediate(N.getOperand(1), Imm))
2784 return false;
2785 // If this is an or of disjoint bitfields, we can codegen this as an add
2786 // (for better address arithmetic) if the LHS and RHS of the OR are
2787 // provably disjoint.
2788 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2789 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2790 return false;
2791 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2792 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2793 else
2794 Base = N.getOperand(0);
2795 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2796 return true;
2797 }
2798
2799 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2800 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2801 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2802 return true;
2803 }
2804
2805 return false;
2806}
2807
2808/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2809/// represented as an indexed [r+r] operation.
2811 SDValue &Index,
2812 SelectionDAG &DAG) const {
2813 // Check to see if we can easily represent this as an [r+r] address. This
2814 // will fail if it thinks that the address is more profitably represented as
2815 // reg+imm, e.g. where imm = 0.
2816 if (SelectAddressRegReg(N, Base, Index, DAG))
2817 return true;
2818
2819 // If the address is the result of an add, we will utilize the fact that the
2820 // address calculation includes an implicit add. However, we can reduce
2821 // register pressure if we do not materialize a constant just for use as the
2822 // index register. We only get rid of the add if it is not an add of a
2823 // value and a 16-bit signed constant and both have a single use.
2824 int16_t imm = 0;
2825 if (N.getOpcode() == ISD::ADD &&
2826 (!isIntS16Immediate(N.getOperand(1), imm) ||
2827 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2828 Base = N.getOperand(0);
2829 Index = N.getOperand(1);
2830 return true;
2831 }
2832
2833 // Otherwise, do it the hard way, using R0 as the base register.
2834 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2835 N.getValueType());
2836 Index = N;
2837 return true;
2838}
2839
2840template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2841 Ty *PCRelCand = dyn_cast<Ty>(N);
2842 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2843}
2844
2845/// Returns true if this address is a PC Relative address.
2846/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2847/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2849 // This is a materialize PC Relative node. Always select this as PC Relative.
2850 Base = N;
2851 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2852 return true;
2857 return true;
2858 return false;
2859}
2860
2861/// Returns true if we should use a direct load into vector instruction
2862/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2863static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2864
2865 // If there are any other uses other than scalar to vector, then we should
2866 // keep it as a scalar load -> direct move pattern to prevent multiple
2867 // loads.
2869 if (!LD)
2870 return false;
2871
2872 EVT MemVT = LD->getMemoryVT();
2873 if (!MemVT.isSimple())
2874 return false;
2875 switch(MemVT.getSimpleVT().SimpleTy) {
2876 case MVT::i64:
2877 break;
2878 case MVT::i32:
2879 if (!ST.hasP8Vector())
2880 return false;
2881 break;
2882 case MVT::i16:
2883 case MVT::i8:
2884 if (!ST.hasP9Vector())
2885 return false;
2886 break;
2887 default:
2888 return false;
2889 }
2890
2891 SDValue LoadedVal(N, 0);
2892 if (!LoadedVal.hasOneUse())
2893 return false;
2894
2895 for (SDUse &Use : LD->uses())
2896 if (Use.getResNo() == 0 &&
2897 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2898 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2899 return false;
2900
2901 return true;
2902}
2903
2904/// getPreIndexedAddressParts - returns true by value, base pointer and
2905/// offset pointer and addressing mode by reference if the node's address
2906/// can be legally represented as pre-indexed load / store address.
2908 SDValue &Offset,
2910 SelectionDAG &DAG) const {
2911 if (DisablePPCPreinc) return false;
2912
2913 bool isLoad = true;
2914 SDValue Ptr;
2915 EVT VT;
2916 Align Alignment;
2917 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2918 Ptr = LD->getBasePtr();
2919 VT = LD->getMemoryVT();
2920 Alignment = LD->getAlign();
2921 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2922 Ptr = ST->getBasePtr();
2923 VT = ST->getMemoryVT();
2924 Alignment = ST->getAlign();
2925 isLoad = false;
2926 } else
2927 return false;
2928
2929 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2930 // instructions because we can fold these into a more efficient instruction
2931 // instead, (such as LXSD).
2932 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2933 return false;
2934 }
2935
2936 // PowerPC doesn't have preinc load/store instructions for vectors
2937 if (VT.isVector())
2938 return false;
2939
2940 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2941 // Common code will reject creating a pre-inc form if the base pointer
2942 // is a frame index, or if N is a store and the base pointer is either
2943 // the same as or a predecessor of the value being stored. Check for
2944 // those situations here, and try with swapped Base/Offset instead.
2945 bool Swap = false;
2946
2948 Swap = true;
2949 else if (!isLoad) {
2950 SDValue Val = cast<StoreSDNode>(N)->getValue();
2951 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2952 Swap = true;
2953 }
2954
2955 if (Swap)
2957
2958 AM = ISD::PRE_INC;
2959 return true;
2960 }
2961
2962 // LDU/STU can only handle immediates that are a multiple of 4.
2963 if (VT != MVT::i64) {
2964 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2965 return false;
2966 } else {
2967 // LDU/STU need an address with at least 4-byte alignment.
2968 if (Alignment < Align(4))
2969 return false;
2970
2971 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2972 return false;
2973 }
2974
2975 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2976 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2977 // sext i32 to i64 when addr mode is r+i.
2978 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2979 LD->getExtensionType() == ISD::SEXTLOAD &&
2981 return false;
2982 }
2983
2984 AM = ISD::PRE_INC;
2985 return true;
2986}
2987
2988//===----------------------------------------------------------------------===//
2989// LowerOperation implementation
2990//===----------------------------------------------------------------------===//
2991
2992/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2993/// and LoOpFlags to the target MO flags.
2994static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2995 unsigned &HiOpFlags, unsigned &LoOpFlags,
2996 const GlobalValue *GV = nullptr) {
2997 HiOpFlags = PPCII::MO_HA;
2998 LoOpFlags = PPCII::MO_LO;
2999
3000 // Don't use the pic base if not in PIC relocation model.
3001 if (IsPIC) {
3002 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3003 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3004 }
3005}
3006
3007static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3008 SelectionDAG &DAG) {
3009 SDLoc DL(HiPart);
3010 EVT PtrVT = HiPart.getValueType();
3011 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3012
3013 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3014 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3015
3016 // With PIC, the first instruction is actually "GR+hi(&G)".
3017 if (isPIC)
3018 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3019 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3020
3021 // Generate non-pic code that has direct accesses to the constant pool.
3022 // The address of the global is just (hi(&g)+lo(&g)).
3023 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3024}
3025
3027 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3028 FuncInfo->setUsesTOCBasePtr();
3029}
3030
3034
3035SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3036 SDValue GA) const {
3037 EVT VT = Subtarget.getScalarIntVT();
3038 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3039 : Subtarget.isAIXABI()
3040 ? DAG.getRegister(PPC::R2, VT)
3041 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3042 SDValue Ops[] = { GA, Reg };
3043 return DAG.getMemIntrinsicNode(
3044 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3047}
3048
3049SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3050 SelectionDAG &DAG) const {
3051 EVT PtrVT = Op.getValueType();
3052 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3053 const Constant *C = CP->getConstVal();
3054
3055 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3056 // The actual address of the GlobalValue is stored in the TOC.
3057 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3058 if (Subtarget.isUsingPCRelativeCalls()) {
3059 SDLoc DL(CP);
3060 EVT Ty = getPointerTy(DAG.getDataLayout());
3061 SDValue ConstPool = DAG.getTargetConstantPool(
3062 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3063 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3064 }
3065 setUsesTOCBasePtr(DAG);
3066 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3067 return getTOCEntry(DAG, SDLoc(CP), GA);
3068 }
3069
3070 unsigned MOHiFlag, MOLoFlag;
3071 bool IsPIC = isPositionIndependent();
3072 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3073
3074 if (IsPIC && Subtarget.isSVR4ABI()) {
3075 SDValue GA =
3077 return getTOCEntry(DAG, SDLoc(CP), GA);
3078 }
3079
3080 SDValue CPIHi =
3081 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3082 SDValue CPILo =
3083 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3084 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3085}
3086
3087// For 64-bit PowerPC, prefer the more compact relative encodings.
3088// This trades 32 bits per jump table entry for one or two instructions
3089// on the jump site.
3096
3099 return false;
3100 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3101 return true;
3103}
3104
3106 SelectionDAG &DAG) const {
3107 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3109
3110 switch (getTargetMachine().getCodeModel()) {
3111 case CodeModel::Small:
3112 case CodeModel::Medium:
3114 default:
3115 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3117 }
3118}
3119
3120const MCExpr *
3122 unsigned JTI,
3123 MCContext &Ctx) const {
3124 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3126
3127 switch (getTargetMachine().getCodeModel()) {
3128 case CodeModel::Small:
3129 case CodeModel::Medium:
3131 default:
3132 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3133 }
3134}
3135
3136SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3137 EVT PtrVT = Op.getValueType();
3139
3140 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3141 if (Subtarget.isUsingPCRelativeCalls()) {
3142 SDLoc DL(JT);
3143 EVT Ty = getPointerTy(DAG.getDataLayout());
3144 SDValue GA =
3146 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3147 return MatAddr;
3148 }
3149
3150 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3151 // The actual address of the GlobalValue is stored in the TOC.
3152 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3153 setUsesTOCBasePtr(DAG);
3154 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3155 return getTOCEntry(DAG, SDLoc(JT), GA);
3156 }
3157
3158 unsigned MOHiFlag, MOLoFlag;
3159 bool IsPIC = isPositionIndependent();
3160 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3161
3162 if (IsPIC && Subtarget.isSVR4ABI()) {
3163 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3165 return getTOCEntry(DAG, SDLoc(GA), GA);
3166 }
3167
3168 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3169 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3170 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3171}
3172
3173SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3174 SelectionDAG &DAG) const {
3175 EVT PtrVT = Op.getValueType();
3176 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3177 const BlockAddress *BA = BASDN->getBlockAddress();
3178
3179 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3180 if (Subtarget.isUsingPCRelativeCalls()) {
3181 SDLoc DL(BASDN);
3182 EVT Ty = getPointerTy(DAG.getDataLayout());
3183 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3185 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3186 return MatAddr;
3187 }
3188
3189 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3190 // The actual BlockAddress is stored in the TOC.
3191 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3192 setUsesTOCBasePtr(DAG);
3193 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3194 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3195 }
3196
3197 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3198 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3199 return getTOCEntry(
3200 DAG, SDLoc(BASDN),
3201 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3202
3203 unsigned MOHiFlag, MOLoFlag;
3204 bool IsPIC = isPositionIndependent();
3205 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3206 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3207 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3208 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3209}
3210
3211SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3212 SelectionDAG &DAG) const {
3213 if (Subtarget.isAIXABI())
3214 return LowerGlobalTLSAddressAIX(Op, DAG);
3215
3216 return LowerGlobalTLSAddressLinux(Op, DAG);
3217}
3218
3219/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3220/// and then apply the update.
3222 SelectionDAG &DAG,
3223 const TargetMachine &TM) {
3224 // Initialize TLS model opt setting lazily:
3225 // (1) Use initial-exec for single TLS var references within current function.
3226 // (2) Use local-dynamic for multiple TLS var references within current
3227 // function.
3228 PPCFunctionInfo *FuncInfo =
3230 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3232 // Iterate over all instructions within current function, collect all TLS
3233 // global variables (global variables taken as the first parameter to
3234 // Intrinsic::threadlocal_address).
3235 const Function &Func = DAG.getMachineFunction().getFunction();
3236 for (const BasicBlock &BB : Func)
3237 for (const Instruction &I : BB)
3238 if (I.getOpcode() == Instruction::Call)
3239 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3240 if (Function *CF = CI->getCalledFunction())
3241 if (CF->isDeclaration() &&
3242 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3243 if (const GlobalValue *GV =
3244 dyn_cast<GlobalValue>(I.getOperand(0))) {
3245 TLSModel::Model GVModel = TM.getTLSModel(GV);
3246 if (GVModel == TLSModel::LocalDynamic)
3247 TLSGV.insert(GV);
3248 }
3249
3250 unsigned TLSGVCnt = TLSGV.size();
3251 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3252 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3253 FuncInfo->setAIXFuncUseTLSIEForLD();
3255 }
3256
3257 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3258 LLVM_DEBUG(
3259 dbgs() << DAG.getMachineFunction().getName()
3260 << " function is using the TLS-IE model for TLS-LD access.\n");
3261 Model = TLSModel::InitialExec;
3262 }
3263}
3264
3265SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3266 SelectionDAG &DAG) const {
3267 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3268
3269 if (DAG.getTarget().useEmulatedTLS())
3270 report_fatal_error("Emulated TLS is not yet supported on AIX");
3271
3272 SDLoc dl(GA);
3273 const GlobalValue *GV = GA->getGlobal();
3274 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3275 bool Is64Bit = Subtarget.isPPC64();
3277
3278 // Apply update to the TLS model.
3279 if (Subtarget.hasAIXShLibTLSModelOpt())
3281
3282 // TLS variables are accessed through TOC entries.
3283 // To support this, set the DAG to use the TOC base pointer.
3284 setUsesTOCBasePtr(DAG);
3285
3286 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3287
3288 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3289 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3290 bool HasAIXSmallTLSGlobalAttr = false;
3291 SDValue VariableOffsetTGA =
3292 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3293 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3294 SDValue TLSReg;
3295
3296 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3297 if (GVar->hasAttribute("aix-small-tls"))
3298 HasAIXSmallTLSGlobalAttr = true;
3299
3300 if (Is64Bit) {
3301 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3302 // involves a load of the variable offset (from the TOC), followed by an
3303 // add of the loaded variable offset to R13 (the thread pointer).
3304 // This code sequence looks like:
3305 // ld reg1,var[TC](2)
3306 // add reg2, reg1, r13 // r13 contains the thread pointer
3307 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3308
3309 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3310 // global variable attribute, produce a faster access sequence for
3311 // local-exec TLS variables where the offset from the TLS base is encoded
3312 // as an immediate operand.
3313 //
3314 // We only utilize the faster local-exec access sequence when the TLS
3315 // variable has a size within the policy limit. We treat types that are
3316 // not sized or are empty as being over the policy size limit.
3317 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3318 IsTLSLocalExecModel) {
3319 Type *GVType = GV->getValueType();
3320 if (GVType->isSized() && !GVType->isEmptyTy() &&
3321 GV->getDataLayout().getTypeAllocSize(GVType) <=
3323 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3324 }
3325 } else {
3326 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3327 // involves loading the variable offset from the TOC, generating a call to
3328 // .__get_tpointer to get the thread pointer (which will be in R3), and
3329 // adding the two together:
3330 // lwz reg1,var[TC](2)
3331 // bla .__get_tpointer
3332 // add reg2, reg1, r3
3333 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3334
3335 // We do not implement the 32-bit version of the faster access sequence
3336 // for local-exec that is controlled by the -maix-small-local-exec-tls
3337 // option, or the "aix-small-tls" global variable attribute.
3338 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3339 report_fatal_error("The small-local-exec TLS access sequence is "
3340 "currently only supported on AIX (64-bit mode).");
3341 }
3342 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3343 }
3344
3345 if (Model == TLSModel::LocalDynamic) {
3346 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3347
3348 // We do not implement the 32-bit version of the faster access sequence
3349 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3350 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3351 report_fatal_error("The small-local-dynamic TLS access sequence is "
3352 "currently only supported on AIX (64-bit mode).");
3353
3354 // For local-dynamic on AIX, we need to generate one TOC entry for each
3355 // variable offset, and a single module-handle TOC entry for the entire
3356 // file.
3357
3358 SDValue VariableOffsetTGA =
3359 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3360 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3361
3363 GlobalVariable *TLSGV =
3364 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3365 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3367 assert(TLSGV && "Not able to create GV for _$TLSML.");
3368 SDValue ModuleHandleTGA =
3369 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3370 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3371 SDValue ModuleHandle =
3372 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3373
3374 // With the -maix-small-local-dynamic-tls option, produce a faster access
3375 // sequence for local-dynamic TLS variables where the offset from the
3376 // module-handle is encoded as an immediate operand.
3377 //
3378 // We only utilize the faster local-dynamic access sequence when the TLS
3379 // variable has a size within the policy limit. We treat types that are
3380 // not sized or are empty as being over the policy size limit.
3381 if (HasAIXSmallLocalDynamicTLS) {
3382 Type *GVType = GV->getValueType();
3383 if (GVType->isSized() && !GVType->isEmptyTy() &&
3384 GV->getDataLayout().getTypeAllocSize(GVType) <=
3386 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3387 ModuleHandle);
3388 }
3389
3390 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3391 }
3392
3393 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3394 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3395 // need to generate two TOC entries, one for the variable offset, one for the
3396 // region handle. The global address for the TOC entry of the region handle is
3397 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3398 // entry of the variable offset is created with MO_TLSGD_FLAG.
3399 SDValue VariableOffsetTGA =
3400 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3401 SDValue RegionHandleTGA =
3402 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3403 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3404 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3405 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3406 RegionHandle);
3407}
3408
3409SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3410 SelectionDAG &DAG) const {
3411 // FIXME: TLS addresses currently use medium model code sequences,
3412 // which is the most useful form. Eventually support for small and
3413 // large models could be added if users need it, at the cost of
3414 // additional complexity.
3415 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3416 if (DAG.getTarget().useEmulatedTLS())
3417 return LowerToTLSEmulatedModel(GA, DAG);
3418
3419 SDLoc dl(GA);
3420 const GlobalValue *GV = GA->getGlobal();
3421 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3422 bool is64bit = Subtarget.isPPC64();
3423 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3424 PICLevel::Level picLevel = M->getPICLevel();
3425
3426 const TargetMachine &TM = getTargetMachine();
3427 TLSModel::Model Model = TM.getTLSModel(GV);
3428
3429 if (Model == TLSModel::LocalExec) {
3430 if (Subtarget.isUsingPCRelativeCalls()) {
3431 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3432 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3434 SDValue MatAddr =
3435 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3436 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3437 }
3438
3439 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3441 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3443 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3444 : DAG.getRegister(PPC::R2, MVT::i32);
3445
3446 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3447 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3448 }
3449
3450 if (Model == TLSModel::InitialExec) {
3451 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3453 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3454 SDValue TGATLS = DAG.getTargetGlobalAddress(
3455 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3456 SDValue TPOffset;
3457 if (IsPCRel) {
3458 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3459 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3460 MachinePointerInfo());
3461 } else {
3462 SDValue GOTPtr;
3463 if (is64bit) {
3464 setUsesTOCBasePtr(DAG);
3465 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3466 GOTPtr =
3467 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3468 } else {
3469 if (!TM.isPositionIndependent())
3470 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3471 else if (picLevel == PICLevel::SmallPIC)
3472 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3473 else
3474 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3475 }
3476 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3477 }
3478 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3479 }
3480
3481 if (Model == TLSModel::GeneralDynamic) {
3482 if (Subtarget.isUsingPCRelativeCalls()) {
3483 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3485 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3486 }
3487
3488 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3489 SDValue GOTPtr;
3490 if (is64bit) {
3491 setUsesTOCBasePtr(DAG);
3492 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3493 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3494 GOTReg, TGA);
3495 } else {
3496 if (picLevel == PICLevel::SmallPIC)
3497 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3498 else
3499 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3500 }
3501 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3502 GOTPtr, TGA, TGA);
3503 }
3504
3505 if (Model == TLSModel::LocalDynamic) {
3506 if (Subtarget.isUsingPCRelativeCalls()) {
3507 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3509 SDValue MatPCRel =
3510 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3511 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3512 }
3513
3514 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3515 SDValue GOTPtr;
3516 if (is64bit) {
3517 setUsesTOCBasePtr(DAG);
3518 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3519 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3520 GOTReg, TGA);
3521 } else {
3522 if (picLevel == PICLevel::SmallPIC)
3523 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3524 else
3525 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3526 }
3527 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3528 PtrVT, GOTPtr, TGA, TGA);
3529 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3530 PtrVT, TLSAddr, TGA);
3531 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3532 }
3533
3534 llvm_unreachable("Unknown TLS model!");
3535}
3536
3537SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3538 SelectionDAG &DAG) const {
3539 EVT PtrVT = Op.getValueType();
3540 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3541 SDLoc DL(GSDN);
3542 const GlobalValue *GV = GSDN->getGlobal();
3543
3544 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3545 // The actual address of the GlobalValue is stored in the TOC.
3546 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3547 if (Subtarget.isUsingPCRelativeCalls()) {
3548 EVT Ty = getPointerTy(DAG.getDataLayout());
3550 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3552 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3553 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3554 MachinePointerInfo());
3555 return Load;
3556 } else {
3557 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3559 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3560 }
3561 }
3562 setUsesTOCBasePtr(DAG);
3563 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3564 return getTOCEntry(DAG, DL, GA);
3565 }
3566
3567 unsigned MOHiFlag, MOLoFlag;
3568 bool IsPIC = isPositionIndependent();
3569 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3570
3571 if (IsPIC && Subtarget.isSVR4ABI()) {
3572 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3573 GSDN->getOffset(),
3575 return getTOCEntry(DAG, DL, GA);
3576 }
3577
3578 SDValue GAHi =
3579 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3580 SDValue GALo =
3581 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3582
3583 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3584}
3585
3586SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3587 bool IsStrict = Op->isStrictFPOpcode();
3588 ISD::CondCode CC =
3589 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3590 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3591 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3592 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3593 EVT LHSVT = LHS.getValueType();
3594 SDLoc dl(Op);
3595
3596 // Soften the setcc with libcall if it is fp128.
3597 if (LHSVT == MVT::f128) {
3598 assert(!Subtarget.hasP9Vector() &&
3599 "SETCC for f128 is already legal under Power9!");
3600 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3601 Op->getOpcode() == ISD::STRICT_FSETCCS);
3602 if (RHS.getNode())
3603 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3604 DAG.getCondCode(CC));
3605 if (IsStrict)
3606 return DAG.getMergeValues({LHS, Chain}, dl);
3607 return LHS;
3608 }
3609
3610 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3611
3612 if (Op.getValueType() == MVT::v2i64) {
3613 // When the operands themselves are v2i64 values, we need to do something
3614 // special because VSX has no underlying comparison operations for these.
3615 if (LHS.getValueType() == MVT::v2i64) {
3616 // Equality can be handled by casting to the legal type for Altivec
3617 // comparisons, everything else needs to be expanded.
3618 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3619 return SDValue();
3620 SDValue SetCC32 = DAG.getSetCC(
3621 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3622 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3623 int ShuffV[] = {1, 0, 3, 2};
3624 SDValue Shuff =
3625 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3626 return DAG.getBitcast(MVT::v2i64,
3627 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3628 dl, MVT::v4i32, Shuff, SetCC32));
3629 }
3630
3631 // We handle most of these in the usual way.
3632 return Op;
3633 }
3634
3635 // If we're comparing for equality to zero, expose the fact that this is
3636 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3637 // fold the new nodes.
3638 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3639 return V;
3640
3641 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3642 // Leave comparisons against 0 and -1 alone for now, since they're usually
3643 // optimized. FIXME: revisit this when we can custom lower all setcc
3644 // optimizations.
3645 if (C->isAllOnes() || C->isZero())
3646 return SDValue();
3647 }
3648
3649 // If we have an integer seteq/setne, turn it into a compare against zero
3650 // by xor'ing the rhs with the lhs, which is faster than setting a
3651 // condition register, reading it back out, and masking the correct bit. The
3652 // normal approach here uses sub to do this instead of xor. Using xor exposes
3653 // the result to other bit-twiddling opportunities.
3654 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3655 EVT VT = Op.getValueType();
3656 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3657 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3658 }
3659 return SDValue();
3660}
3661
3662SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3663 SDNode *Node = Op.getNode();
3664 EVT VT = Node->getValueType(0);
3665 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3666 SDValue InChain = Node->getOperand(0);
3667 SDValue VAListPtr = Node->getOperand(1);
3668 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3669 SDLoc dl(Node);
3670
3671 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3672
3673 // gpr_index
3674 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3675 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3676 InChain = GprIndex.getValue(1);
3677
3678 if (VT == MVT::i64) {
3679 // Check if GprIndex is even
3680 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3681 DAG.getConstant(1, dl, MVT::i32));
3682 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3683 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3684 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3685 DAG.getConstant(1, dl, MVT::i32));
3686 // Align GprIndex to be even if it isn't
3687 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3688 GprIndex);
3689 }
3690
3691 // fpr index is 1 byte after gpr
3692 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3693 DAG.getConstant(1, dl, MVT::i32));
3694
3695 // fpr
3696 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3697 FprPtr, MachinePointerInfo(SV), MVT::i8);
3698 InChain = FprIndex.getValue(1);
3699
3700 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3701 DAG.getConstant(8, dl, MVT::i32));
3702
3703 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3704 DAG.getConstant(4, dl, MVT::i32));
3705
3706 // areas
3707 SDValue OverflowArea =
3708 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3709 InChain = OverflowArea.getValue(1);
3710
3711 SDValue RegSaveArea =
3712 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3713 InChain = RegSaveArea.getValue(1);
3714
3715 // select overflow_area if index > 8
3716 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3717 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3718
3719 // adjustment constant gpr_index * 4/8
3720 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3721 VT.isInteger() ? GprIndex : FprIndex,
3722 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3723 MVT::i32));
3724
3725 // OurReg = RegSaveArea + RegConstant
3726 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3727 RegConstant);
3728
3729 // Floating types are 32 bytes into RegSaveArea
3730 if (VT.isFloatingPoint())
3731 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3732 DAG.getConstant(32, dl, MVT::i32));
3733
3734 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3735 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3736 VT.isInteger() ? GprIndex : FprIndex,
3737 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3738 MVT::i32));
3739
3740 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3741 VT.isInteger() ? VAListPtr : FprPtr,
3742 MachinePointerInfo(SV), MVT::i8);
3743
3744 // determine if we should load from reg_save_area or overflow_area
3745 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3746
3747 // increase overflow_area by 4/8 if gpr/fpr > 8
3748 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3749 DAG.getConstant(VT.isInteger() ? 4 : 8,
3750 dl, MVT::i32));
3751
3752 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3753 OverflowAreaPlusN);
3754
3755 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3756 MachinePointerInfo(), MVT::i32);
3757
3758 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3759}
3760
3761SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3762 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3763
3764 // We have to copy the entire va_list struct:
3765 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3766 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3767 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3768 false, true, /*CI=*/nullptr, std::nullopt,
3769 MachinePointerInfo(), MachinePointerInfo());
3770}
3771
3772SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3773 SelectionDAG &DAG) const {
3774 return Op.getOperand(0);
3775}
3776
3777SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3778 MachineFunction &MF = DAG.getMachineFunction();
3779 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3780
3781 assert((Op.getOpcode() == ISD::INLINEASM ||
3782 Op.getOpcode() == ISD::INLINEASM_BR) &&
3783 "Expecting Inline ASM node.");
3784
3785 // If an LR store is already known to be required then there is not point in
3786 // checking this ASM as well.
3787 if (MFI.isLRStoreRequired())
3788 return Op;
3789
3790 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3791 // type MVT::Glue. We want to ignore this last operand if that is the case.
3792 unsigned NumOps = Op.getNumOperands();
3793 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3794 --NumOps;
3795
3796 // Check all operands that may contain the LR.
3797 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3798 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3799 unsigned NumVals = Flags.getNumOperandRegisters();
3800 ++i; // Skip the ID value.
3801
3802 switch (Flags.getKind()) {
3803 default:
3804 llvm_unreachable("Bad flags!");
3808 i += NumVals;
3809 break;
3813 for (; NumVals; --NumVals, ++i) {
3814 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3815 if (Reg != PPC::LR && Reg != PPC::LR8)
3816 continue;
3817 MFI.setLRStoreRequired();
3818 return Op;
3819 }
3820 break;
3821 }
3822 }
3823 }
3824
3825 return Op;
3826}
3827
3828SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3829 SelectionDAG &DAG) const {
3830 SDValue Chain = Op.getOperand(0);
3831 SDValue Trmp = Op.getOperand(1); // trampoline
3832 SDValue FPtr = Op.getOperand(2); // nested function
3833 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3834 SDLoc dl(Op);
3835
3836 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3837
3838 if (Subtarget.isAIXABI()) {
3839 // On AIX we create a trampoline descriptor by combining the
3840 // entry point and TOC from the global descriptor (FPtr) with the
3841 // nest argument as the environment pointer.
3842 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3843 MaybeAlign PointerAlign(PointerSize);
3844 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3847 : MachineMemOperand::MONone;
3848
3849 uint64_t TOCPointerOffset = 1 * PointerSize;
3850 uint64_t EnvPointerOffset = 2 * PointerSize;
3851 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3852 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3853
3854 const Value *TrampolineAddr =
3855 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3856 const Function *Func =
3857 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3858
3859 SDValue OutChains[3];
3860
3861 // Copy the entry point address from the global descriptor to the
3862 // trampoline buffer.
3863 SDValue LoadEntryPoint =
3864 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3865 PointerAlign, MMOFlags);
3866 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3867 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3868 MachinePointerInfo(TrampolineAddr, 0));
3869
3870 // Copy the TOC pointer from the global descriptor to the trampoline
3871 // buffer.
3872 SDValue TOCFromDescriptorPtr =
3873 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3874 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3875 MachinePointerInfo(Func, TOCPointerOffset),
3876 PointerAlign, MMOFlags);
3877 SDValue TrampolineTOCPointer =
3878 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3879 SDValue TOCLoadChain = TOCReg.getValue(1);
3880 OutChains[1] =
3881 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3882 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3883
3884 // Store the nest argument into the environment pointer in the trampoline
3885 // buffer.
3886 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3887 OutChains[2] =
3888 DAG.getStore(Chain, dl, Nest, EnvPointer,
3889 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3890
3892 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3893 return TokenFactor;
3894 }
3895
3896 bool isPPC64 = (PtrVT == MVT::i64);
3897 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3898
3900 Args.emplace_back(Trmp, IntPtrTy);
3901 // TrampSize == (isPPC64 ? 48 : 40);
3902 Args.emplace_back(
3903 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3904 IntPtrTy);
3905 Args.emplace_back(FPtr, IntPtrTy);
3906 Args.emplace_back(Nest, IntPtrTy);
3907
3908 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3909 TargetLowering::CallLoweringInfo CLI(DAG);
3910 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3912 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3913
3914 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3915 return CallResult.second;
3916}
3917
3918SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3919 MachineFunction &MF = DAG.getMachineFunction();
3920 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3921 EVT PtrVT = getPointerTy(MF.getDataLayout());
3922
3923 SDLoc dl(Op);
3924
3925 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3926 // vastart just stores the address of the VarArgsFrameIndex slot into the
3927 // memory location argument.
3928 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3929 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3930 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3931 MachinePointerInfo(SV));
3932 }
3933
3934 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3935 // We suppose the given va_list is already allocated.
3936 //
3937 // typedef struct {
3938 // char gpr; /* index into the array of 8 GPRs
3939 // * stored in the register save area
3940 // * gpr=0 corresponds to r3,
3941 // * gpr=1 to r4, etc.
3942 // */
3943 // char fpr; /* index into the array of 8 FPRs
3944 // * stored in the register save area
3945 // * fpr=0 corresponds to f1,
3946 // * fpr=1 to f2, etc.
3947 // */
3948 // char *overflow_arg_area;
3949 // /* location on stack that holds
3950 // * the next overflow argument
3951 // */
3952 // char *reg_save_area;
3953 // /* where r3:r10 and f1:f8 (if saved)
3954 // * are stored
3955 // */
3956 // } va_list[1];
3957
3958 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3959 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3960 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3961 PtrVT);
3962 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3963 PtrVT);
3964
3965 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3966 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3967
3968 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3969 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3970
3971 uint64_t FPROffset = 1;
3972 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3973
3974 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3975
3976 // Store first byte : number of int regs
3977 SDValue firstStore =
3978 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3979 MachinePointerInfo(SV), MVT::i8);
3980 uint64_t nextOffset = FPROffset;
3981 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3982 ConstFPROffset);
3983
3984 // Store second byte : number of float regs
3985 SDValue secondStore =
3986 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3987 MachinePointerInfo(SV, nextOffset), MVT::i8);
3988 nextOffset += StackOffset;
3989 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3990
3991 // Store second word : arguments given on stack
3992 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3993 MachinePointerInfo(SV, nextOffset));
3994 nextOffset += FrameOffset;
3995 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3996
3997 // Store third word : arguments given in registers
3998 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3999 MachinePointerInfo(SV, nextOffset));
4000}
4001
4002/// FPR - The set of FP registers that should be allocated for arguments
4003/// on Darwin and AIX.
4004static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4005 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4006 PPC::F11, PPC::F12, PPC::F13};
4007
4008/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4009/// the stack.
4010static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4011 unsigned PtrByteSize) {
4012 unsigned ArgSize = ArgVT.getStoreSize();
4013 if (Flags.isByVal())
4014 ArgSize = Flags.getByValSize();
4015
4016 // Round up to multiples of the pointer size, except for array members,
4017 // which are always packed.
4018 if (!Flags.isInConsecutiveRegs())
4019 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4020
4021 return ArgSize;
4022}
4023
4024/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4025/// on the stack.
4027 ISD::ArgFlagsTy Flags,
4028 unsigned PtrByteSize) {
4029 Align Alignment(PtrByteSize);
4030
4031 // Altivec parameters are padded to a 16 byte boundary.
4032 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4033 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4034 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4035 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4036 Alignment = Align(16);
4037
4038 // ByVal parameters are aligned as requested.
4039 if (Flags.isByVal()) {
4040 auto BVAlign = Flags.getNonZeroByValAlign();
4041 if (BVAlign > PtrByteSize) {
4042 if (BVAlign.value() % PtrByteSize != 0)
4044 "ByVal alignment is not a multiple of the pointer size");
4045
4046 Alignment = BVAlign;
4047 }
4048 }
4049
4050 // Array members are always packed to their original alignment.
4051 if (Flags.isInConsecutiveRegs()) {
4052 // If the array member was split into multiple registers, the first
4053 // needs to be aligned to the size of the full type. (Except for
4054 // ppcf128, which is only aligned as its f64 components.)
4055 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4056 Alignment = Align(OrigVT.getStoreSize());
4057 else
4058 Alignment = Align(ArgVT.getStoreSize());
4059 }
4060
4061 return Alignment;
4062}
4063
4064/// CalculateStackSlotUsed - Return whether this argument will use its
4065/// stack slot (instead of being passed in registers). ArgOffset,
4066/// AvailableFPRs, and AvailableVRs must hold the current argument
4067/// position, and will be updated to account for this argument.
4068static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4069 unsigned PtrByteSize, unsigned LinkageSize,
4070 unsigned ParamAreaSize, unsigned &ArgOffset,
4071 unsigned &AvailableFPRs,
4072 unsigned &AvailableVRs) {
4073 bool UseMemory = false;
4074
4075 // Respect alignment of argument on the stack.
4076 Align Alignment =
4077 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4078 ArgOffset = alignTo(ArgOffset, Alignment);
4079 // If there's no space left in the argument save area, we must
4080 // use memory (this check also catches zero-sized arguments).
4081 if (ArgOffset >= LinkageSize + ParamAreaSize)
4082 UseMemory = true;
4083
4084 // Allocate argument on the stack.
4085 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4086 if (Flags.isInConsecutiveRegsLast())
4087 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4088 // If we overran the argument save area, we must use memory
4089 // (this check catches arguments passed partially in memory)
4090 if (ArgOffset > LinkageSize + ParamAreaSize)
4091 UseMemory = true;
4092
4093 // However, if the argument is actually passed in an FPR or a VR,
4094 // we don't use memory after all.
4095 if (!Flags.isByVal()) {
4096 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4097 if (AvailableFPRs > 0) {
4098 --AvailableFPRs;
4099 return false;
4100 }
4101 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4102 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4103 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4104 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4105 if (AvailableVRs > 0) {
4106 --AvailableVRs;
4107 return false;
4108 }
4109 }
4110
4111 return UseMemory;
4112}
4113
4114/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4115/// ensure minimum alignment required for target.
4117 unsigned NumBytes) {
4118 return alignTo(NumBytes, Lowering->getStackAlign());
4119}
4120
4121SDValue PPCTargetLowering::LowerFormalArguments(
4122 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4124 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4125 if (Subtarget.isAIXABI())
4126 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127 InVals);
4128 if (Subtarget.is64BitELFABI())
4129 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4130 InVals);
4131 assert(Subtarget.is32BitELFABI());
4132 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4133 InVals);
4134}
4135
4136SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4137 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4138 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4139 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4140
4141 // 32-bit SVR4 ABI Stack Frame Layout:
4142 // +-----------------------------------+
4143 // +--> | Back chain |
4144 // | +-----------------------------------+
4145 // | | Floating-point register save area |
4146 // | +-----------------------------------+
4147 // | | General register save area |
4148 // | +-----------------------------------+
4149 // | | CR save word |
4150 // | +-----------------------------------+
4151 // | | VRSAVE save word |
4152 // | +-----------------------------------+
4153 // | | Alignment padding |
4154 // | +-----------------------------------+
4155 // | | Vector register save area |
4156 // | +-----------------------------------+
4157 // | | Local variable space |
4158 // | +-----------------------------------+
4159 // | | Parameter list area |
4160 // | +-----------------------------------+
4161 // | | LR save word |
4162 // | +-----------------------------------+
4163 // SP--> +--- | Back chain |
4164 // +-----------------------------------+
4165 //
4166 // Specifications:
4167 // System V Application Binary Interface PowerPC Processor Supplement
4168 // AltiVec Technology Programming Interface Manual
4169
4170 MachineFunction &MF = DAG.getMachineFunction();
4171 MachineFrameInfo &MFI = MF.getFrameInfo();
4172 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4173
4174 EVT PtrVT = getPointerTy(MF.getDataLayout());
4175 // Potential tail calls could cause overwriting of argument stack slots.
4176 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4177 (CallConv == CallingConv::Fast));
4178 const Align PtrAlign(4);
4179
4180 // Assign locations to all of the incoming arguments.
4182 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4183 *DAG.getContext());
4184
4185 // Reserve space for the linkage area on the stack.
4186 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4187 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4188 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4189
4190 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4191 CCValAssign &VA = ArgLocs[i];
4192
4193 // Arguments stored in registers.
4194 if (VA.isRegLoc()) {
4195 const TargetRegisterClass *RC;
4196 EVT ValVT = VA.getValVT();
4197
4198 switch (ValVT.getSimpleVT().SimpleTy) {
4199 default:
4200 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4201 case MVT::i1:
4202 case MVT::i32:
4203 RC = &PPC::GPRCRegClass;
4204 break;
4205 case MVT::f32:
4206 if (Subtarget.hasP8Vector())
4207 RC = &PPC::VSSRCRegClass;
4208 else if (Subtarget.hasSPE())
4209 RC = &PPC::GPRCRegClass;
4210 else
4211 RC = &PPC::F4RCRegClass;
4212 break;
4213 case MVT::f64:
4214 if (Subtarget.hasVSX())
4215 RC = &PPC::VSFRCRegClass;
4216 else if (Subtarget.hasSPE())
4217 // SPE passes doubles in GPR pairs.
4218 RC = &PPC::GPRCRegClass;
4219 else
4220 RC = &PPC::F8RCRegClass;
4221 break;
4222 case MVT::v16i8:
4223 case MVT::v8i16:
4224 case MVT::v4i32:
4225 RC = &PPC::VRRCRegClass;
4226 break;
4227 case MVT::v4f32:
4228 RC = &PPC::VRRCRegClass;
4229 break;
4230 case MVT::v2f64:
4231 case MVT::v2i64:
4232 RC = &PPC::VRRCRegClass;
4233 break;
4234 }
4235
4236 SDValue ArgValue;
4237 // Transform the arguments stored in physical registers into
4238 // virtual ones.
4239 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4240 assert(i + 1 < e && "No second half of double precision argument");
4241 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4242 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4243 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4244 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4245 if (!Subtarget.isLittleEndian())
4246 std::swap (ArgValueLo, ArgValueHi);
4247 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4248 ArgValueHi);
4249 } else {
4250 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4251 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4252 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4253 if (ValVT == MVT::i1)
4254 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4255 }
4256
4257 InVals.push_back(ArgValue);
4258 } else {
4259 // Argument stored in memory.
4260 assert(VA.isMemLoc());
4261
4262 // Get the extended size of the argument type in stack
4263 unsigned ArgSize = VA.getLocVT().getStoreSize();
4264 // Get the actual size of the argument type
4265 unsigned ObjSize = VA.getValVT().getStoreSize();
4266 unsigned ArgOffset = VA.getLocMemOffset();
4267 // Stack objects in PPC32 are right justified.
4268 ArgOffset += ArgSize - ObjSize;
4269 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4270
4271 // Create load nodes to retrieve arguments from the stack.
4272 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4273 InVals.push_back(
4274 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4275 }
4276 }
4277
4278 // Assign locations to all of the incoming aggregate by value arguments.
4279 // Aggregates passed by value are stored in the local variable space of the
4280 // caller's stack frame, right above the parameter list area.
4281 SmallVector<CCValAssign, 16> ByValArgLocs;
4282 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4283 ByValArgLocs, *DAG.getContext());
4284
4285 // Reserve stack space for the allocations in CCInfo.
4286 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4287
4288 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4289
4290 // Area that is at least reserved in the caller of this function.
4291 unsigned MinReservedArea = CCByValInfo.getStackSize();
4292 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4293
4294 // Set the size that is at least reserved in caller of this function. Tail
4295 // call optimized function's reserved stack space needs to be aligned so that
4296 // taking the difference between two stack areas will result in an aligned
4297 // stack.
4298 MinReservedArea =
4299 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4300 FuncInfo->setMinReservedArea(MinReservedArea);
4301
4303
4304 // If the function takes variable number of arguments, make a frame index for
4305 // the start of the first vararg value... for expansion of llvm.va_start.
4306 if (isVarArg) {
4307 static const MCPhysReg GPArgRegs[] = {
4308 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4309 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4310 };
4311 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4312
4313 static const MCPhysReg FPArgRegs[] = {
4314 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4315 PPC::F8
4316 };
4317 unsigned NumFPArgRegs = std::size(FPArgRegs);
4318
4319 if (useSoftFloat() || hasSPE())
4320 NumFPArgRegs = 0;
4321
4322 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4323 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4324
4325 // Make room for NumGPArgRegs and NumFPArgRegs.
4326 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4327 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4328
4330 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4331
4332 FuncInfo->setVarArgsFrameIndex(
4333 MFI.CreateStackObject(Depth, Align(8), false));
4334 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4335
4336 // The fixed integer arguments of a variadic function are stored to the
4337 // VarArgsFrameIndex on the stack so that they may be loaded by
4338 // dereferencing the result of va_next.
4339 for (MCPhysReg GPArgReg : GPArgRegs) {
4340 // Get an existing live-in vreg, or add a new one.
4341 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4342 if (!VReg)
4343 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4344
4345 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4346 SDValue Store =
4347 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4348 MemOps.push_back(Store);
4349 // Increment the address by four for the next argument to store
4350 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4351 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4352 }
4353
4354 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4355 // is set.
4356 // The double arguments are stored to the VarArgsFrameIndex
4357 // on the stack.
4358 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4359 // Get an existing live-in vreg, or add a new one.
4360 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4361 if (!VReg)
4362 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4363
4364 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4365 SDValue Store =
4366 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4367 MemOps.push_back(Store);
4368 // Increment the address by eight for the next argument to store
4369 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4370 PtrVT);
4371 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4372 }
4373 }
4374
4375 if (!MemOps.empty())
4376 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4377
4378 return Chain;
4379}
4380
4381// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4382// value to MVT::i64 and then truncate to the correct register size.
4383SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4384 EVT ObjectVT, SelectionDAG &DAG,
4385 SDValue ArgVal,
4386 const SDLoc &dl) const {
4387 if (Flags.isSExt())
4388 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4389 DAG.getValueType(ObjectVT));
4390 else if (Flags.isZExt())
4391 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4392 DAG.getValueType(ObjectVT));
4393
4394 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4395}
4396
4397SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4398 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4399 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4400 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4401 // TODO: add description of PPC stack frame format, or at least some docs.
4402 //
4403 bool isELFv2ABI = Subtarget.isELFv2ABI();
4404 bool isLittleEndian = Subtarget.isLittleEndian();
4405 MachineFunction &MF = DAG.getMachineFunction();
4406 MachineFrameInfo &MFI = MF.getFrameInfo();
4407 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4408
4409 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4410 "fastcc not supported on varargs functions");
4411
4412 EVT PtrVT = getPointerTy(MF.getDataLayout());
4413 // Potential tail calls could cause overwriting of argument stack slots.
4414 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4415 (CallConv == CallingConv::Fast));
4416 unsigned PtrByteSize = 8;
4417 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4418
4419 static const MCPhysReg GPR[] = {
4420 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4421 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4422 };
4423 static const MCPhysReg VR[] = {
4424 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4425 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4426 };
4427
4428 const unsigned Num_GPR_Regs = std::size(GPR);
4429 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4430 const unsigned Num_VR_Regs = std::size(VR);
4431
4432 // Do a first pass over the arguments to determine whether the ABI
4433 // guarantees that our caller has allocated the parameter save area
4434 // on its stack frame. In the ELFv1 ABI, this is always the case;
4435 // in the ELFv2 ABI, it is true if this is a vararg function or if
4436 // any parameter is located in a stack slot.
4437
4438 bool HasParameterArea = !isELFv2ABI || isVarArg;
4439 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4440 unsigned NumBytes = LinkageSize;
4441 unsigned AvailableFPRs = Num_FPR_Regs;
4442 unsigned AvailableVRs = Num_VR_Regs;
4443 for (const ISD::InputArg &In : Ins) {
4444 if (In.Flags.isNest())
4445 continue;
4446
4447 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4448 LinkageSize, ParamAreaSize, NumBytes,
4449 AvailableFPRs, AvailableVRs))
4450 HasParameterArea = true;
4451 }
4452
4453 // Add DAG nodes to load the arguments or copy them out of registers. On
4454 // entry to a function on PPC, the arguments start after the linkage area,
4455 // although the first ones are often in registers.
4456
4457 unsigned ArgOffset = LinkageSize;
4458 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4461 unsigned CurArgIdx = 0;
4462 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4463 SDValue ArgVal;
4464 bool needsLoad = false;
4465 EVT ObjectVT = Ins[ArgNo].VT;
4466 EVT OrigVT = Ins[ArgNo].ArgVT;
4467 unsigned ObjSize = ObjectVT.getStoreSize();
4468 unsigned ArgSize = ObjSize;
4469 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4470 if (Ins[ArgNo].isOrigArg()) {
4471 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4472 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4473 }
4474 // We re-align the argument offset for each argument, except when using the
4475 // fast calling convention, when we need to make sure we do that only when
4476 // we'll actually use a stack slot.
4477 unsigned CurArgOffset;
4478 Align Alignment;
4479 auto ComputeArgOffset = [&]() {
4480 /* Respect alignment of argument on the stack. */
4481 Alignment =
4482 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4483 ArgOffset = alignTo(ArgOffset, Alignment);
4484 CurArgOffset = ArgOffset;
4485 };
4486
4487 if (CallConv != CallingConv::Fast) {
4488 ComputeArgOffset();
4489
4490 /* Compute GPR index associated with argument offset. */
4491 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4492 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4493 }
4494
4495 // FIXME the codegen can be much improved in some cases.
4496 // We do not have to keep everything in memory.
4497 if (Flags.isByVal()) {
4498 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4499
4500 if (CallConv == CallingConv::Fast)
4501 ComputeArgOffset();
4502
4503 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4504 ObjSize = Flags.getByValSize();
4505 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4506 // Empty aggregate parameters do not take up registers. Examples:
4507 // struct { } a;
4508 // union { } b;
4509 // int c[0];
4510 // etc. However, we have to provide a place-holder in InVals, so
4511 // pretend we have an 8-byte item at the current address for that
4512 // purpose.
4513 if (!ObjSize) {
4514 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4515 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4516 InVals.push_back(FIN);
4517 continue;
4518 }
4519
4520 // Create a stack object covering all stack doublewords occupied
4521 // by the argument. If the argument is (fully or partially) on
4522 // the stack, or if the argument is fully in registers but the
4523 // caller has allocated the parameter save anyway, we can refer
4524 // directly to the caller's stack frame. Otherwise, create a
4525 // local copy in our own frame.
4526 int FI;
4527 if (HasParameterArea ||
4528 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4529 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4530 else
4531 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4532 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4533
4534 // Handle aggregates smaller than 8 bytes.
4535 if (ObjSize < PtrByteSize) {
4536 // The value of the object is its address, which differs from the
4537 // address of the enclosing doubleword on big-endian systems.
4538 SDValue Arg = FIN;
4539 if (!isLittleEndian) {
4540 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4541 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4542 }
4543 InVals.push_back(Arg);
4544
4545 if (GPR_idx != Num_GPR_Regs) {
4546 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4547 FuncInfo->addLiveInAttr(VReg, Flags);
4548 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4549 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4550 SDValue Store =
4551 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4552 MachinePointerInfo(&*FuncArg), ObjType);
4553 MemOps.push_back(Store);
4554 }
4555 // Whether we copied from a register or not, advance the offset
4556 // into the parameter save area by a full doubleword.
4557 ArgOffset += PtrByteSize;
4558 continue;
4559 }
4560
4561 // The value of the object is its address, which is the address of
4562 // its first stack doubleword.
4563 InVals.push_back(FIN);
4564
4565 // Store whatever pieces of the object are in registers to memory.
4566 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4567 if (GPR_idx == Num_GPR_Regs)
4568 break;
4569
4570 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4571 FuncInfo->addLiveInAttr(VReg, Flags);
4572 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4573 SDValue Addr = FIN;
4574 if (j) {
4575 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4576 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4577 }
4578 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4579 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4580 SDValue Store =
4581 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4582 MachinePointerInfo(&*FuncArg, j), ObjType);
4583 MemOps.push_back(Store);
4584 ++GPR_idx;
4585 }
4586 ArgOffset += ArgSize;
4587 continue;
4588 }
4589
4590 switch (ObjectVT.getSimpleVT().SimpleTy) {
4591 default: llvm_unreachable("Unhandled argument type!");
4592 case MVT::i1:
4593 case MVT::i32:
4594 case MVT::i64:
4595 if (Flags.isNest()) {
4596 // The 'nest' parameter, if any, is passed in R11.
4597 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4598 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4599
4600 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4601 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4602
4603 break;
4604 }
4605
4606 // These can be scalar arguments or elements of an integer array type
4607 // passed directly. Clang may use those instead of "byval" aggregate
4608 // types to avoid forcing arguments to memory unnecessarily.
4609 if (GPR_idx != Num_GPR_Regs) {
4610 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4611 FuncInfo->addLiveInAttr(VReg, Flags);
4612 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4613
4614 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4615 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4616 // value to MVT::i64 and then truncate to the correct register size.
4617 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4618 } else {
4619 if (CallConv == CallingConv::Fast)
4620 ComputeArgOffset();
4621
4622 needsLoad = true;
4623 ArgSize = PtrByteSize;
4624 }
4625 if (CallConv != CallingConv::Fast || needsLoad)
4626 ArgOffset += 8;
4627 break;
4628
4629 case MVT::f32:
4630 case MVT::f64:
4631 // These can be scalar arguments or elements of a float array type
4632 // passed directly. The latter are used to implement ELFv2 homogenous
4633 // float aggregates.
4634 if (FPR_idx != Num_FPR_Regs) {
4635 unsigned VReg;
4636
4637 if (ObjectVT == MVT::f32)
4638 VReg = MF.addLiveIn(FPR[FPR_idx],
4639 Subtarget.hasP8Vector()
4640 ? &PPC::VSSRCRegClass
4641 : &PPC::F4RCRegClass);
4642 else
4643 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4644 ? &PPC::VSFRCRegClass
4645 : &PPC::F8RCRegClass);
4646
4647 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4648 ++FPR_idx;
4649 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4650 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4651 // once we support fp <-> gpr moves.
4652
4653 // This can only ever happen in the presence of f32 array types,
4654 // since otherwise we never run out of FPRs before running out
4655 // of GPRs.
4656 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4657 FuncInfo->addLiveInAttr(VReg, Flags);
4658 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4659
4660 if (ObjectVT == MVT::f32) {
4661 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4662 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4663 DAG.getConstant(32, dl, MVT::i32));
4664 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4665 }
4666
4667 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4668 } else {
4669 if (CallConv == CallingConv::Fast)
4670 ComputeArgOffset();
4671
4672 needsLoad = true;
4673 }
4674
4675 // When passing an array of floats, the array occupies consecutive
4676 // space in the argument area; only round up to the next doubleword
4677 // at the end of the array. Otherwise, each float takes 8 bytes.
4678 if (CallConv != CallingConv::Fast || needsLoad) {
4679 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4680 ArgOffset += ArgSize;
4681 if (Flags.isInConsecutiveRegsLast())
4682 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4683 }
4684 break;
4685 case MVT::v4f32:
4686 case MVT::v4i32:
4687 case MVT::v8i16:
4688 case MVT::v16i8:
4689 case MVT::v2f64:
4690 case MVT::v2i64:
4691 case MVT::v1i128:
4692 case MVT::f128:
4693 // These can be scalar arguments or elements of a vector array type
4694 // passed directly. The latter are used to implement ELFv2 homogenous
4695 // vector aggregates.
4696 if (VR_idx != Num_VR_Regs) {
4697 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4698 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4699 ++VR_idx;
4700 } else {
4701 if (CallConv == CallingConv::Fast)
4702 ComputeArgOffset();
4703 needsLoad = true;
4704 }
4705 if (CallConv != CallingConv::Fast || needsLoad)
4706 ArgOffset += 16;
4707 break;
4708 }
4709
4710 // We need to load the argument to a virtual register if we determined
4711 // above that we ran out of physical registers of the appropriate type.
4712 if (needsLoad) {
4713 if (ObjSize < ArgSize && !isLittleEndian)
4714 CurArgOffset += ArgSize - ObjSize;
4715 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4716 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4717 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4718 }
4719
4720 InVals.push_back(ArgVal);
4721 }
4722
4723 // Area that is at least reserved in the caller of this function.
4724 unsigned MinReservedArea;
4725 if (HasParameterArea)
4726 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4727 else
4728 MinReservedArea = LinkageSize;
4729
4730 // Set the size that is at least reserved in caller of this function. Tail
4731 // call optimized functions' reserved stack space needs to be aligned so that
4732 // taking the difference between two stack areas will result in an aligned
4733 // stack.
4734 MinReservedArea =
4735 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4736 FuncInfo->setMinReservedArea(MinReservedArea);
4737
4738 // If the function takes variable number of arguments, make a frame index for
4739 // the start of the first vararg value... for expansion of llvm.va_start.
4740 // On ELFv2ABI spec, it writes:
4741 // C programs that are intended to be *portable* across different compilers
4742 // and architectures must use the header file <stdarg.h> to deal with variable
4743 // argument lists.
4744 if (isVarArg && MFI.hasVAStart()) {
4745 int Depth = ArgOffset;
4746
4747 FuncInfo->setVarArgsFrameIndex(
4748 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4749 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4750
4751 // If this function is vararg, store any remaining integer argument regs
4752 // to their spots on the stack so that they may be loaded by dereferencing
4753 // the result of va_next.
4754 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4755 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4756 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4757 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4758 SDValue Store =
4759 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4760 MemOps.push_back(Store);
4761 // Increment the address by four for the next argument to store
4762 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4763 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4764 }
4765 }
4766
4767 if (!MemOps.empty())
4768 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4769
4770 return Chain;
4771}
4772
4773/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4774/// adjusted to accommodate the arguments for the tailcall.
4775static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4776 unsigned ParamSize) {
4777
4778 if (!isTailCall) return 0;
4779
4781 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4782 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4783 // Remember only if the new adjustment is bigger.
4784 if (SPDiff < FI->getTailCallSPDelta())
4785 FI->setTailCallSPDelta(SPDiff);
4786
4787 return SPDiff;
4788}
4789
4790static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4791
4792static bool callsShareTOCBase(const Function *Caller,
4793 const GlobalValue *CalleeGV,
4794 const TargetMachine &TM) {
4795 // It does not make sense to call callsShareTOCBase() with a caller that
4796 // is PC Relative since PC Relative callers do not have a TOC.
4797#ifndef NDEBUG
4798 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4799 assert(!STICaller->isUsingPCRelativeCalls() &&
4800 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4801#endif
4802
4803 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4804 // don't have enough information to determine if the caller and callee share
4805 // the same TOC base, so we have to pessimistically assume they don't for
4806 // correctness.
4807 if (!CalleeGV)
4808 return false;
4809
4810 // If the callee is preemptable, then the static linker will use a plt-stub
4811 // which saves the toc to the stack, and needs a nop after the call
4812 // instruction to convert to a toc-restore.
4813 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4814 return false;
4815
4816 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4817 // We may need a TOC restore in the situation where the caller requires a
4818 // valid TOC but the callee is PC Relative and does not.
4819 const Function *F = dyn_cast<Function>(CalleeGV);
4820 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4821
4822 // If we have an Alias we can try to get the function from there.
4823 if (Alias) {
4824 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4825 F = dyn_cast<Function>(GlobalObj);
4826 }
4827
4828 // If we still have no valid function pointer we do not have enough
4829 // information to determine if the callee uses PC Relative calls so we must
4830 // assume that it does.
4831 if (!F)
4832 return false;
4833
4834 // If the callee uses PC Relative we cannot guarantee that the callee won't
4835 // clobber the TOC of the caller and so we must assume that the two
4836 // functions do not share a TOC base.
4837 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4838 if (STICallee->isUsingPCRelativeCalls())
4839 return false;
4840
4841 // If the GV is not a strong definition then we need to assume it can be
4842 // replaced by another function at link time. The function that replaces
4843 // it may not share the same TOC as the caller since the callee may be
4844 // replaced by a PC Relative version of the same function.
4845 if (!CalleeGV->isStrongDefinitionForLinker())
4846 return false;
4847
4848 // The medium and large code models are expected to provide a sufficiently
4849 // large TOC to provide all data addressing needs of a module with a
4850 // single TOC.
4851 if (CodeModel::Medium == TM.getCodeModel() ||
4853 return true;
4854
4855 // Any explicitly-specified sections and section prefixes must also match.
4856 // Also, if we're using -ffunction-sections, then each function is always in
4857 // a different section (the same is true for COMDAT functions).
4858 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4859 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4860 return false;
4861 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4862 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4863 return false;
4864 }
4865
4866 return true;
4867}
4868
4869static bool
4871 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4872 assert(Subtarget.is64BitELFABI());
4873
4874 const unsigned PtrByteSize = 8;
4875 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4876
4877 static const MCPhysReg GPR[] = {
4878 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4879 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4880 };
4881 static const MCPhysReg VR[] = {
4882 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4883 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4884 };
4885
4886 const unsigned NumGPRs = std::size(GPR);
4887 const unsigned NumFPRs = 13;
4888 const unsigned NumVRs = std::size(VR);
4889 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4890
4891 unsigned NumBytes = LinkageSize;
4892 unsigned AvailableFPRs = NumFPRs;
4893 unsigned AvailableVRs = NumVRs;
4894
4895 for (const ISD::OutputArg& Param : Outs) {
4896 if (Param.Flags.isNest()) continue;
4897
4898 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4899 LinkageSize, ParamAreaSize, NumBytes,
4900 AvailableFPRs, AvailableVRs))
4901 return true;
4902 }
4903 return false;
4904}
4905
4906static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4907 if (CB.arg_size() != CallerFn->arg_size())
4908 return false;
4909
4910 auto CalleeArgIter = CB.arg_begin();
4911 auto CalleeArgEnd = CB.arg_end();
4912 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4913
4914 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4915 const Value* CalleeArg = *CalleeArgIter;
4916 const Value* CallerArg = &(*CallerArgIter);
4917 if (CalleeArg == CallerArg)
4918 continue;
4919
4920 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4921 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4922 // }
4923 // 1st argument of callee is undef and has the same type as caller.
4924 if (CalleeArg->getType() == CallerArg->getType() &&
4925 isa<UndefValue>(CalleeArg))
4926 continue;
4927
4928 return false;
4929 }
4930
4931 return true;
4932}
4933
4934// Returns true if TCO is possible between the callers and callees
4935// calling conventions.
4936static bool
4938 CallingConv::ID CalleeCC) {
4939 // Tail calls are possible with fastcc and ccc.
4940 auto isTailCallableCC = [] (CallingConv::ID CC){
4941 return CC == CallingConv::C || CC == CallingConv::Fast;
4942 };
4943 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4944 return false;
4945
4946 // We can safely tail call both fastcc and ccc callees from a c calling
4947 // convention caller. If the caller is fastcc, we may have less stack space
4948 // than a non-fastcc caller with the same signature so disable tail-calls in
4949 // that case.
4950 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4951}
4952
4953bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4954 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4955 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4957 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4958 bool isCalleeExternalSymbol) const {
4959 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4960
4961 if (DisableSCO && !TailCallOpt) return false;
4962
4963 // Variadic argument functions are not supported.
4964 if (isVarArg) return false;
4965
4966 // Check that the calling conventions are compatible for tco.
4967 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4968 return false;
4969
4970 // Caller contains any byval parameter is not supported.
4971 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4972 return false;
4973
4974 // Callee contains any byval parameter is not supported, too.
4975 // Note: This is a quick work around, because in some cases, e.g.
4976 // caller's stack size > callee's stack size, we are still able to apply
4977 // sibling call optimization. For example, gcc is able to do SCO for caller1
4978 // in the following example, but not for caller2.
4979 // struct test {
4980 // long int a;
4981 // char ary[56];
4982 // } gTest;
4983 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4984 // b->a = v.a;
4985 // return 0;
4986 // }
4987 // void caller1(struct test a, struct test c, struct test *b) {
4988 // callee(gTest, b); }
4989 // void caller2(struct test *b) { callee(gTest, b); }
4990 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4991 return false;
4992
4993 // If callee and caller use different calling conventions, we cannot pass
4994 // parameters on stack since offsets for the parameter area may be different.
4995 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4996 return false;
4997
4998 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4999 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5000 // callee potentially have different TOC bases then we cannot tail call since
5001 // we need to restore the TOC pointer after the call.
5002 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5003 // We cannot guarantee this for indirect calls or calls to external functions.
5004 // When PC-Relative addressing is used, the concept of the TOC is no longer
5005 // applicable so this check is not required.
5006 // Check first for indirect calls.
5007 if (!Subtarget.isUsingPCRelativeCalls() &&
5008 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5009 return false;
5010
5011 // Check if we share the TOC base.
5012 if (!Subtarget.isUsingPCRelativeCalls() &&
5013 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5014 return false;
5015
5016 // TCO allows altering callee ABI, so we don't have to check further.
5017 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5018 return true;
5019
5020 if (DisableSCO) return false;
5021
5022 // If callee use the same argument list that caller is using, then we can
5023 // apply SCO on this case. If it is not, then we need to check if callee needs
5024 // stack for passing arguments.
5025 // PC Relative tail calls may not have a CallBase.
5026 // If there is no CallBase we cannot verify if we have the same argument
5027 // list so assume that we don't have the same argument list.
5028 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5029 needStackSlotPassParameters(Subtarget, Outs))
5030 return false;
5031 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5032 return false;
5033
5034 return true;
5035}
5036
5037/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5038/// for tail call optimization. Targets which want to do tail call
5039/// optimization should implement this function.
5040bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5041 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5042 CallingConv::ID CallerCC, bool isVarArg,
5043 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5044 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5045 return false;
5046
5047 // Variable argument functions are not supported.
5048 if (isVarArg)
5049 return false;
5050
5051 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5052 // Functions containing by val parameters are not supported.
5053 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5054 return false;
5055
5056 // Non-PIC/GOT tail calls are supported.
5057 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5058 return true;
5059
5060 // At the moment we can only do local tail calls (in same module, hidden
5061 // or protected) if we are generating PIC.
5062 if (CalleeGV)
5063 return CalleeGV->hasHiddenVisibility() ||
5064 CalleeGV->hasProtectedVisibility();
5065 }
5066
5067 return false;
5068}
5069
5070/// isCallCompatibleAddress - Return the immediate to use if the specified
5071/// 32-bit value is representable in the immediate field of a BxA instruction.
5074 if (!C) return nullptr;
5075
5076 int Addr = C->getZExtValue();
5077 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5078 SignExtend32<26>(Addr) != Addr)
5079 return nullptr; // Top 6 bits have to be sext of immediate.
5080
5081 return DAG
5083 (int)C->getZExtValue() >> 2, SDLoc(Op),
5085 .getNode();
5086}
5087
5088namespace {
5089
5090struct TailCallArgumentInfo {
5091 SDValue Arg;
5092 SDValue FrameIdxOp;
5093 int FrameIdx = 0;
5094
5095 TailCallArgumentInfo() = default;
5096};
5097
5098} // end anonymous namespace
5099
5100/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5102 SelectionDAG &DAG, SDValue Chain,
5103 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5104 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5105 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5106 SDValue Arg = TailCallArgs[i].Arg;
5107 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5108 int FI = TailCallArgs[i].FrameIdx;
5109 // Store relative to framepointer.
5110 MemOpChains.push_back(DAG.getStore(
5111 Chain, dl, Arg, FIN,
5113 }
5114}
5115
5116/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5117/// the appropriate stack slot for the tail call optimized function call.
5119 SDValue OldRetAddr, SDValue OldFP,
5120 int SPDiff, const SDLoc &dl) {
5121 if (SPDiff) {
5122 // Calculate the new stack slot for the return address.
5124 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5125 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5126 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5127 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5128 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5129 NewRetAddrLoc, true);
5130 SDValue NewRetAddrFrIdx =
5131 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5132 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5133 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5134 }
5135 return Chain;
5136}
5137
5138/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5139/// the position of the argument.
5141 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5142 int SPDiff, unsigned ArgOffset,
5143 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5144 int Offset = ArgOffset + SPDiff;
5145 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5146 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5147 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5148 SDValue FIN = DAG.getFrameIndex(FI, VT);
5149 TailCallArgumentInfo Info;
5150 Info.Arg = Arg;
5151 Info.FrameIdxOp = FIN;
5152 Info.FrameIdx = FI;
5153 TailCallArguments.push_back(Info);
5154}
5155
5156/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5157/// stack slot. Returns the chain as result and the loaded frame pointers in
5158/// LROpOut/FPOpout. Used when tail calling.
5159SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5160 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5161 SDValue &FPOpOut, const SDLoc &dl) const {
5162 if (SPDiff) {
5163 // Load the LR and FP stack slot for later adjusting.
5164 LROpOut = getReturnAddrFrameIndex(DAG);
5165 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5166 MachinePointerInfo());
5167 Chain = SDValue(LROpOut.getNode(), 1);
5168 }
5169 return Chain;
5170}
5171
5172/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5173/// by "Src" to address "Dst" of size "Size". Alignment information is
5174/// specified by the specific parameter attribute. The copy will be passed as
5175/// a byval function parameter.
5176/// Sometimes what we are copying is the end of a larger object, the part that
5177/// does not fit in registers.
5179 SDValue Chain, ISD::ArgFlagsTy Flags,
5180 SelectionDAG &DAG, const SDLoc &dl) {
5181 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5182 return DAG.getMemcpy(
5183 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5184 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5185}
5186
5187/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5188/// tail calls.
5190 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5191 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5192 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5193 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5195 if (!isTailCall) {
5196 if (isVector) {
5197 SDValue StackPtr;
5198 if (isPPC64)
5199 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5200 else
5201 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5202 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5203 DAG.getConstant(ArgOffset, dl, PtrVT));
5204 }
5205 MemOpChains.push_back(
5206 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5207 // Calculate and remember argument location.
5208 } else
5209 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5210 TailCallArguments);
5211}
5212
5213static void
5215 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5216 SDValue FPOp,
5217 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5218 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5219 // might overwrite each other in case of tail call optimization.
5220 SmallVector<SDValue, 8> MemOpChains2;
5221 // Do not flag preceding copytoreg stuff together with the following stuff.
5222 InGlue = SDValue();
5223 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5224 MemOpChains2, dl);
5225 if (!MemOpChains2.empty())
5226 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5227
5228 // Store the return address to the appropriate stack slot.
5229 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5230
5231 // Emit callseq_end just before tailcall node.
5232 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5233 InGlue = Chain.getValue(1);
5234}
5235
5236// Is this global address that of a function that can be called by name? (as
5237// opposed to something that must hold a descriptor for an indirect call).
5238static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5239 if (GV) {
5240 if (GV->isThreadLocal())
5241 return false;
5242
5243 return GV->getValueType()->isFunctionTy();
5244 }
5245
5246 return false;
5247}
5248
5249SDValue PPCTargetLowering::LowerCallResult(
5250 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5251 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5252 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5254 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5255 *DAG.getContext());
5256
5257 CCRetInfo.AnalyzeCallResult(
5258 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5260 : RetCC_PPC);
5261
5262 // Copy all of the result registers out of their specified physreg.
5263 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5264 CCValAssign &VA = RVLocs[i];
5265 assert(VA.isRegLoc() && "Can only return in registers!");
5266
5267 SDValue Val;
5268
5269 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5270 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5271 InGlue);
5272 Chain = Lo.getValue(1);
5273 InGlue = Lo.getValue(2);
5274 VA = RVLocs[++i]; // skip ahead to next loc
5275 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5276 InGlue);
5277 Chain = Hi.getValue(1);
5278 InGlue = Hi.getValue(2);
5279 if (!Subtarget.isLittleEndian())
5280 std::swap (Lo, Hi);
5281 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5282 } else {
5283 Val = DAG.getCopyFromReg(Chain, dl,
5284 VA.getLocReg(), VA.getLocVT(), InGlue);
5285 Chain = Val.getValue(1);
5286 InGlue = Val.getValue(2);
5287 }
5288
5289 switch (VA.getLocInfo()) {
5290 default: llvm_unreachable("Unknown loc info!");
5291 case CCValAssign::Full: break;
5292 case CCValAssign::AExt:
5293 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5294 break;
5295 case CCValAssign::ZExt:
5296 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5297 DAG.getValueType(VA.getValVT()));
5298 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5299 break;
5300 case CCValAssign::SExt:
5301 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5302 DAG.getValueType(VA.getValVT()));
5303 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5304 break;
5305 }
5306
5307 InVals.push_back(Val);
5308 }
5309
5310 return Chain;
5311}
5312
5313static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5314 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5315 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5316 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5317
5318 // PatchPoint calls are not indirect.
5319 if (isPatchPoint)
5320 return false;
5321
5323 return false;
5324
5325 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5326 // becuase the immediate function pointer points to a descriptor instead of
5327 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5328 // pointer immediate points to the global entry point, while the BLA would
5329 // need to jump to the local entry point (see rL211174).
5330 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5331 isBLACompatibleAddress(Callee, DAG))
5332 return false;
5333
5334 return true;
5335}
5336
5337// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5338static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5339 return Subtarget.isAIXABI() ||
5340 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5341}
5342
5344 const Function &Caller, const SDValue &Callee,
5345 const PPCSubtarget &Subtarget,
5346 const TargetMachine &TM,
5347 bool IsStrictFPCall = false) {
5348 if (CFlags.IsTailCall)
5349 return PPCISD::TC_RETURN;
5350
5351 unsigned RetOpc = 0;
5352 // This is a call through a function pointer.
5353 if (CFlags.IsIndirect) {
5354 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5355 // indirect calls. The save of the caller's TOC pointer to the stack will be
5356 // inserted into the DAG as part of call lowering. The restore of the TOC
5357 // pointer is modeled by using a pseudo instruction for the call opcode that
5358 // represents the 2 instruction sequence of an indirect branch and link,
5359 // immediately followed by a load of the TOC pointer from the stack save
5360 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5361 // as it is not saved or used.
5362 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5363 : PPCISD::BCTRL;
5364 } else if (Subtarget.isUsingPCRelativeCalls()) {
5365 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5366 RetOpc = PPCISD::CALL_NOTOC;
5367 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5368 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5369 // immediately following the call instruction if the caller and callee may
5370 // have different TOC bases. At link time if the linker determines the calls
5371 // may not share a TOC base, the call is redirected to a trampoline inserted
5372 // by the linker. The trampoline will (among other things) save the callers
5373 // TOC pointer at an ABI designated offset in the linkage area and the
5374 // linker will rewrite the nop to be a load of the TOC pointer from the
5375 // linkage area into gpr2.
5376 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5377 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5378 RetOpc =
5379 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5380 } else
5381 RetOpc = PPCISD::CALL;
5382 if (IsStrictFPCall) {
5383 switch (RetOpc) {
5384 default:
5385 llvm_unreachable("Unknown call opcode");
5386 case PPCISD::BCTRL_LOAD_TOC:
5387 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5388 break;
5389 case PPCISD::BCTRL:
5390 RetOpc = PPCISD::BCTRL_RM;
5391 break;
5392 case PPCISD::CALL_NOTOC:
5393 RetOpc = PPCISD::CALL_NOTOC_RM;
5394 break;
5395 case PPCISD::CALL:
5396 RetOpc = PPCISD::CALL_RM;
5397 break;
5398 case PPCISD::CALL_NOP:
5399 RetOpc = PPCISD::CALL_NOP_RM;
5400 break;
5401 }
5402 }
5403 return RetOpc;
5404}
5405
5406static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5407 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5408 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5409 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5410 return SDValue(Dest, 0);
5411
5412 // Returns true if the callee is local, and false otherwise.
5413 auto isLocalCallee = [&]() {
5415 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5416
5417 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5419 };
5420
5421 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5422 // a static relocation model causes some versions of GNU LD (2.17.50, at
5423 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5424 // built with secure-PLT.
5425 bool UsePlt =
5426 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5428
5429 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5430 const TargetMachine &TM = Subtarget.getTargetMachine();
5432 auto *S =
5433 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5434
5436 return DAG.getMCSymbol(S, PtrVT);
5437 };
5438
5439 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5440 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5441 if (isFunctionGlobalAddress(GV)) {
5442 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5443
5444 if (Subtarget.isAIXABI()) {
5445 return getAIXFuncEntryPointSymbolSDNode(GV);
5446 }
5447 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5448 UsePlt ? PPCII::MO_PLT : 0);
5449 }
5450
5452 const char *SymName = S->getSymbol();
5453 if (Subtarget.isAIXABI()) {
5454 // If there exists a user-declared function whose name is the same as the
5455 // ExternalSymbol's, then we pick up the user-declared version.
5457 if (const Function *F =
5458 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5459 return getAIXFuncEntryPointSymbolSDNode(F);
5460
5461 // On AIX, direct function calls reference the symbol for the function's
5462 // entry point, which is named by prepending a "." before the function's
5463 // C-linkage name. A Qualname is returned here because an external
5464 // function entry point is a csect with XTY_ER property.
5465 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5466 auto &Context = DAG.getMachineFunction().getContext();
5467 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5468 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5470 return Sec->getQualNameSymbol();
5471 };
5472
5473 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5474 }
5475 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5476 UsePlt ? PPCII::MO_PLT : 0);
5477 }
5478
5479 // No transformation needed.
5480 assert(Callee.getNode() && "What no callee?");
5481 return Callee;
5482}
5483
5485 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5486 "Expected a CALLSEQ_STARTSDNode.");
5487
5488 // The last operand is the chain, except when the node has glue. If the node
5489 // has glue, then the last operand is the glue, and the chain is the second
5490 // last operand.
5491 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5492 if (LastValue.getValueType() != MVT::Glue)
5493 return LastValue;
5494
5495 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5496}
5497
5498// Creates the node that moves a functions address into the count register
5499// to prepare for an indirect call instruction.
5500static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5501 SDValue &Glue, SDValue &Chain,
5502 const SDLoc &dl) {
5503 SDValue MTCTROps[] = {Chain, Callee, Glue};
5504 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5505 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5506 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5507 // The glue is the second value produced.
5508 Glue = Chain.getValue(1);
5509}
5510
5512 SDValue &Glue, SDValue &Chain,
5513 SDValue CallSeqStart,
5514 const CallBase *CB, const SDLoc &dl,
5515 bool hasNest,
5516 const PPCSubtarget &Subtarget) {
5517 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5518 // entry point, but to the function descriptor (the function entry point
5519 // address is part of the function descriptor though).
5520 // The function descriptor is a three doubleword structure with the
5521 // following fields: function entry point, TOC base address and
5522 // environment pointer.
5523 // Thus for a call through a function pointer, the following actions need
5524 // to be performed:
5525 // 1. Save the TOC of the caller in the TOC save area of its stack
5526 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5527 // 2. Load the address of the function entry point from the function
5528 // descriptor.
5529 // 3. Load the TOC of the callee from the function descriptor into r2.
5530 // 4. Load the environment pointer from the function descriptor into
5531 // r11.
5532 // 5. Branch to the function entry point address.
5533 // 6. On return of the callee, the TOC of the caller needs to be
5534 // restored (this is done in FinishCall()).
5535 //
5536 // The loads are scheduled at the beginning of the call sequence, and the
5537 // register copies are flagged together to ensure that no other
5538 // operations can be scheduled in between. E.g. without flagging the
5539 // copies together, a TOC access in the caller could be scheduled between
5540 // the assignment of the callee TOC and the branch to the callee, which leads
5541 // to incorrect code.
5542
5543 // Start by loading the function address from the descriptor.
5544 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5545 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5549
5550 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5551
5552 // Registers used in building the DAG.
5553 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5554 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5555
5556 // Offsets of descriptor members.
5557 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5558 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5559
5560 const MVT RegVT = Subtarget.getScalarIntVT();
5561 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5562
5563 // One load for the functions entry point address.
5564 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5565 Alignment, MMOFlags);
5566
5567 // One for loading the TOC anchor for the module that contains the called
5568 // function.
5569 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5570 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5571 SDValue TOCPtr =
5572 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5573 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5574
5575 // One for loading the environment pointer.
5576 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5577 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5578 SDValue LoadEnvPtr =
5579 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5580 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5581
5582
5583 // Then copy the newly loaded TOC anchor to the TOC pointer.
5584 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5585 Chain = TOCVal.getValue(0);
5586 Glue = TOCVal.getValue(1);
5587
5588 // If the function call has an explicit 'nest' parameter, it takes the
5589 // place of the environment pointer.
5590 assert((!hasNest || !Subtarget.isAIXABI()) &&
5591 "Nest parameter is not supported on AIX.");
5592 if (!hasNest) {
5593 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5594 Chain = EnvVal.getValue(0);
5595 Glue = EnvVal.getValue(1);
5596 }
5597
5598 // The rest of the indirect call sequence is the same as the non-descriptor
5599 // DAG.
5600 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5601}
5602
5603static void
5605 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5606 SelectionDAG &DAG,
5607 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5608 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5609 const PPCSubtarget &Subtarget) {
5610 const bool IsPPC64 = Subtarget.isPPC64();
5611 // MVT for a general purpose register.
5612 const MVT RegVT = Subtarget.getScalarIntVT();
5613
5614 // First operand is always the chain.
5615 Ops.push_back(Chain);
5616
5617 // If it's a direct call pass the callee as the second operand.
5618 if (!CFlags.IsIndirect)
5619 Ops.push_back(Callee);
5620 else {
5621 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5622
5623 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5624 // on the stack (this would have been done in `LowerCall_64SVR4` or
5625 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5626 // represents both the indirect branch and a load that restores the TOC
5627 // pointer from the linkage area. The operand for the TOC restore is an add
5628 // of the TOC save offset to the stack pointer. This must be the second
5629 // operand: after the chain input but before any other variadic arguments.
5630 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5631 // saved or used.
5632 if (isTOCSaveRestoreRequired(Subtarget)) {
5633 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5634
5635 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5636 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5637 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5638 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5639 Ops.push_back(AddTOC);
5640 }
5641
5642 // Add the register used for the environment pointer.
5643 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5644 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5645 RegVT));
5646
5647
5648 // Add CTR register as callee so a bctr can be emitted later.
5649 if (CFlags.IsTailCall)
5650 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5651 }
5652
5653 // If this is a tail call add stack pointer delta.
5654 if (CFlags.IsTailCall)
5655 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5656
5657 // Add argument registers to the end of the list so that they are known live
5658 // into the call.
5659 for (const auto &[Reg, N] : RegsToPass)
5660 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5661
5662 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5663 // no way to mark dependencies as implicit here.
5664 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5665 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5666 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5667 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5668
5669 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5670 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5671 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5672
5673 // Add a register mask operand representing the call-preserved registers.
5674 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5675 const uint32_t *Mask =
5676 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5677 assert(Mask && "Missing call preserved mask for calling convention");
5678 Ops.push_back(DAG.getRegisterMask(Mask));
5679
5680 // If the glue is valid, it is the last operand.
5681 if (Glue.getNode())
5682 Ops.push_back(Glue);
5683}
5684
5685SDValue PPCTargetLowering::FinishCall(
5686 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5687 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5688 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5689 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5690 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5691
5692 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5693 Subtarget.isAIXABI())
5694 setUsesTOCBasePtr(DAG);
5695
5696 unsigned CallOpc =
5697 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5698 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5699
5700 if (!CFlags.IsIndirect)
5701 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5702 else if (Subtarget.usesFunctionDescriptors())
5703 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5704 dl, CFlags.HasNest, Subtarget);
5705 else
5706 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5707
5708 // Build the operand list for the call instruction.
5710 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5711 SPDiff, Subtarget);
5712
5713 // Emit tail call.
5714 if (CFlags.IsTailCall) {
5715 // Indirect tail call when using PC Relative calls do not have the same
5716 // constraints.
5717 assert(((Callee.getOpcode() == ISD::Register &&
5718 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5719 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5720 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5721 isa<ConstantSDNode>(Callee) ||
5722 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5723 "Expecting a global address, external symbol, absolute value, "
5724 "register or an indirect tail call when PC Relative calls are "
5725 "used.");
5726 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5727 assert(CallOpc == PPCISD::TC_RETURN &&
5728 "Unexpected call opcode for a tail call.");
5730 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5731 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5732 return Ret;
5733 }
5734
5735 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5736 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5737 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5738 Glue = Chain.getValue(1);
5739
5740 // When performing tail call optimization the callee pops its arguments off
5741 // the stack. Account for this here so these bytes can be pushed back on in
5742 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5743 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5745 ? NumBytes
5746 : 0;
5747
5748 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5749 Glue = Chain.getValue(1);
5750
5751 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5752 DAG, InVals);
5753}
5754
5756 CallingConv::ID CalleeCC = CB->getCallingConv();
5757 const Function *CallerFunc = CB->getCaller();
5758 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5759 const Function *CalleeFunc = CB->getCalledFunction();
5760 if (!CalleeFunc)
5761 return false;
5762 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5763
5766
5767 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5768 CalleeFunc->getAttributes(), Outs, *this,
5769 CalleeFunc->getDataLayout());
5770
5771 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5772 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5773 false /*isCalleeExternalSymbol*/);
5774}
5775
5776bool PPCTargetLowering::isEligibleForTCO(
5777 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5778 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5780 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5781 bool isCalleeExternalSymbol) const {
5782 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5783 return false;
5784
5785 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5786 return IsEligibleForTailCallOptimization_64SVR4(
5787 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5788 isCalleeExternalSymbol);
5789 else
5790 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5791 isVarArg, Ins);
5792}
5793
5794SDValue
5795PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5796 SmallVectorImpl<SDValue> &InVals) const {
5797 SelectionDAG &DAG = CLI.DAG;
5798 SDLoc &dl = CLI.DL;
5800 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5802 SDValue Chain = CLI.Chain;
5803 SDValue Callee = CLI.Callee;
5804 bool &isTailCall = CLI.IsTailCall;
5805 CallingConv::ID CallConv = CLI.CallConv;
5806 bool isVarArg = CLI.IsVarArg;
5807 bool isPatchPoint = CLI.IsPatchPoint;
5808 const CallBase *CB = CLI.CB;
5809
5810 if (isTailCall) {
5812 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5813 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5814 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5815 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5816
5817 isTailCall =
5818 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5819 &(MF.getFunction()), IsCalleeExternalSymbol);
5820 if (isTailCall) {
5821 ++NumTailCalls;
5822 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5823 ++NumSiblingCalls;
5824
5825 // PC Relative calls no longer guarantee that the callee is a Global
5826 // Address Node. The callee could be an indirect tail call in which
5827 // case the SDValue for the callee could be a load (to load the address
5828 // of a function pointer) or it may be a register copy (to move the
5829 // address of the callee from a function parameter into a virtual
5830 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5831 assert((Subtarget.isUsingPCRelativeCalls() ||
5832 isa<GlobalAddressSDNode>(Callee)) &&
5833 "Callee should be an llvm::Function object.");
5834
5835 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5836 << "\nTCO callee: ");
5837 LLVM_DEBUG(Callee.dump());
5838 }
5839 }
5840
5841 if (!isTailCall && CB && CB->isMustTailCall())
5842 report_fatal_error("failed to perform tail call elimination on a call "
5843 "site marked musttail");
5844
5845 // When long calls (i.e. indirect calls) are always used, calls are always
5846 // made via function pointer. If we have a function name, first translate it
5847 // into a pointer.
5848 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5849 !isTailCall)
5850 Callee = LowerGlobalAddress(Callee, DAG);
5851
5852 CallFlags CFlags(
5853 CallConv, isTailCall, isVarArg, isPatchPoint,
5854 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5855 // hasNest
5856 Subtarget.is64BitELFABI() &&
5857 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5858 CLI.NoMerge);
5859
5860 if (Subtarget.isAIXABI())
5861 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5862 InVals, CB);
5863
5864 assert(Subtarget.isSVR4ABI());
5865 if (Subtarget.isPPC64())
5866 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5867 InVals, CB);
5868 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5869 InVals, CB);
5870}
5871
5872SDValue PPCTargetLowering::LowerCall_32SVR4(
5873 SDValue Chain, SDValue Callee, CallFlags CFlags,
5875 const SmallVectorImpl<SDValue> &OutVals,
5876 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5878 const CallBase *CB) const {
5879 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5880 // of the 32-bit SVR4 ABI stack frame layout.
5881
5882 const CallingConv::ID CallConv = CFlags.CallConv;
5883 const bool IsVarArg = CFlags.IsVarArg;
5884 const bool IsTailCall = CFlags.IsTailCall;
5885
5886 assert((CallConv == CallingConv::C ||
5887 CallConv == CallingConv::Cold ||
5888 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5889
5890 const Align PtrAlign(4);
5891
5892 MachineFunction &MF = DAG.getMachineFunction();
5893
5894 // Mark this function as potentially containing a function that contains a
5895 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5896 // and restoring the callers stack pointer in this functions epilog. This is
5897 // done because by tail calling the called function might overwrite the value
5898 // in this function's (MF) stack pointer stack slot 0(SP).
5899 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5900 CallConv == CallingConv::Fast)
5901 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5902
5903 // Count how many bytes are to be pushed on the stack, including the linkage
5904 // area, parameter list area and the part of the local variable space which
5905 // contains copies of aggregates which are passed by value.
5906
5907 // Assign locations to all of the outgoing arguments.
5909 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5910
5911 // Reserve space for the linkage area on the stack.
5912 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5913 PtrAlign);
5914
5915 if (IsVarArg) {
5916 // Handle fixed and variable vector arguments differently.
5917 // Fixed vector arguments go into registers as long as registers are
5918 // available. Variable vector arguments always go into memory.
5919 unsigned NumArgs = Outs.size();
5920
5921 for (unsigned i = 0; i != NumArgs; ++i) {
5922 MVT ArgVT = Outs[i].VT;
5923 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5924 bool Result;
5925
5926 if (!ArgFlags.isVarArg()) {
5927 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5928 Outs[i].OrigTy, CCInfo);
5929 } else {
5931 ArgFlags, Outs[i].OrigTy, CCInfo);
5932 }
5933
5934 if (Result) {
5935#ifndef NDEBUG
5936 errs() << "Call operand #" << i << " has unhandled type "
5937 << ArgVT << "\n";
5938#endif
5939 llvm_unreachable(nullptr);
5940 }
5941 }
5942 } else {
5943 // All arguments are treated the same.
5944 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5945 }
5946
5947 // Assign locations to all of the outgoing aggregate by value arguments.
5948 SmallVector<CCValAssign, 16> ByValArgLocs;
5949 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5950
5951 // Reserve stack space for the allocations in CCInfo.
5952 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5953
5954 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5955
5956 // Size of the linkage area, parameter list area and the part of the local
5957 // space variable where copies of aggregates which are passed by value are
5958 // stored.
5959 unsigned NumBytes = CCByValInfo.getStackSize();
5960
5961 // Calculate by how many bytes the stack has to be adjusted in case of tail
5962 // call optimization.
5963 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5964
5965 // Adjust the stack pointer for the new arguments...
5966 // These operations are automatically eliminated by the prolog/epilog pass
5967 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5968 SDValue CallSeqStart = Chain;
5969
5970 // Load the return address and frame pointer so it can be moved somewhere else
5971 // later.
5972 SDValue LROp, FPOp;
5973 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5974
5975 // Set up a copy of the stack pointer for use loading and storing any
5976 // arguments that may not fit in the registers available for argument
5977 // passing.
5978 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5979
5981 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5982 SmallVector<SDValue, 8> MemOpChains;
5983
5984 bool seenFloatArg = false;
5985 // Walk the register/memloc assignments, inserting copies/loads.
5986 // i - Tracks the index into the list of registers allocated for the call
5987 // RealArgIdx - Tracks the index into the list of actual function arguments
5988 // j - Tracks the index into the list of byval arguments
5989 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5990 i != e;
5991 ++i, ++RealArgIdx) {
5992 CCValAssign &VA = ArgLocs[i];
5993 SDValue Arg = OutVals[RealArgIdx];
5994 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5995
5996 if (Flags.isByVal()) {
5997 // Argument is an aggregate which is passed by value, thus we need to
5998 // create a copy of it in the local variable space of the current stack
5999 // frame (which is the stack frame of the caller) and pass the address of
6000 // this copy to the callee.
6001 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6002 CCValAssign &ByValVA = ByValArgLocs[j++];
6003 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6004
6005 // Memory reserved in the local variable space of the callers stack frame.
6006 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6007
6008 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6009 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6010 StackPtr, PtrOff);
6011
6012 // Create a copy of the argument in the local area of the current
6013 // stack frame.
6014 SDValue MemcpyCall =
6015 CreateCopyOfByValArgument(Arg, PtrOff,
6016 CallSeqStart.getNode()->getOperand(0),
6017 Flags, DAG, dl);
6018
6019 // This must go outside the CALLSEQ_START..END.
6020 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6021 SDLoc(MemcpyCall));
6022 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6023 NewCallSeqStart.getNode());
6024 Chain = CallSeqStart = NewCallSeqStart;
6025
6026 // Pass the address of the aggregate copy on the stack either in a
6027 // physical register or in the parameter list area of the current stack
6028 // frame to the callee.
6029 Arg = PtrOff;
6030 }
6031
6032 // When useCRBits() is true, there can be i1 arguments.
6033 // It is because getRegisterType(MVT::i1) => MVT::i1,
6034 // and for other integer types getRegisterType() => MVT::i32.
6035 // Extend i1 and ensure callee will get i32.
6036 if (Arg.getValueType() == MVT::i1)
6037 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6038 dl, MVT::i32, Arg);
6039
6040 if (VA.isRegLoc()) {
6041 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6042 // Put argument in a physical register.
6043 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6044 bool IsLE = Subtarget.isLittleEndian();
6045 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6046 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6047 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6048 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6049 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6050 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6051 SVal.getValue(0)));
6052 } else
6053 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6054 } else {
6055 // Put argument in the parameter list area of the current stack frame.
6056 assert(VA.isMemLoc());
6057 unsigned LocMemOffset = VA.getLocMemOffset();
6058
6059 if (!IsTailCall) {
6060 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6061 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6062 StackPtr, PtrOff);
6063
6064 MemOpChains.push_back(
6065 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6066 } else {
6067 // Calculate and remember argument location.
6068 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6069 TailCallArguments);
6070 }
6071 }
6072 }
6073
6074 if (!MemOpChains.empty())
6075 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6076
6077 // Build a sequence of copy-to-reg nodes chained together with token chain
6078 // and flag operands which copy the outgoing args into the appropriate regs.
6079 SDValue InGlue;
6080 for (const auto &[Reg, N] : RegsToPass) {
6081 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6082 InGlue = Chain.getValue(1);
6083 }
6084
6085 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6086 // registers.
6087 if (IsVarArg) {
6088 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6089 SDValue Ops[] = { Chain, InGlue };
6090
6091 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6092 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6093
6094 InGlue = Chain.getValue(1);
6095 }
6096
6097 if (IsTailCall)
6098 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6099 TailCallArguments);
6100
6101 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6102 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6103}
6104
6105// Copy an argument into memory, being careful to do this outside the
6106// call sequence for the call to which the argument belongs.
6107SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6108 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6109 SelectionDAG &DAG, const SDLoc &dl) const {
6110 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6111 CallSeqStart.getNode()->getOperand(0),
6112 Flags, DAG, dl);
6113 // The MEMCPY must go outside the CALLSEQ_START..END.
6114 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6115 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6116 SDLoc(MemcpyCall));
6117 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6118 NewCallSeqStart.getNode());
6119 return NewCallSeqStart;
6120}
6121
6122SDValue PPCTargetLowering::LowerCall_64SVR4(
6123 SDValue Chain, SDValue Callee, CallFlags CFlags,
6125 const SmallVectorImpl<SDValue> &OutVals,
6126 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6128 const CallBase *CB) const {
6129 bool isELFv2ABI = Subtarget.isELFv2ABI();
6130 bool isLittleEndian = Subtarget.isLittleEndian();
6131 unsigned NumOps = Outs.size();
6132 bool IsSibCall = false;
6133 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6134
6135 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6136 unsigned PtrByteSize = 8;
6137
6138 MachineFunction &MF = DAG.getMachineFunction();
6139
6140 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6141 IsSibCall = true;
6142
6143 // Mark this function as potentially containing a function that contains a
6144 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6145 // and restoring the callers stack pointer in this functions epilog. This is
6146 // done because by tail calling the called function might overwrite the value
6147 // in this function's (MF) stack pointer stack slot 0(SP).
6148 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6149 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6150
6151 assert(!(IsFastCall && CFlags.IsVarArg) &&
6152 "fastcc not supported on varargs functions");
6153
6154 // Count how many bytes are to be pushed on the stack, including the linkage
6155 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6156 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6157 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6158 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6159 unsigned NumBytes = LinkageSize;
6160 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6161
6162 static const MCPhysReg GPR[] = {
6163 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6164 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6165 };
6166 static const MCPhysReg VR[] = {
6167 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6168 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6169 };
6170
6171 const unsigned NumGPRs = std::size(GPR);
6172 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6173 const unsigned NumVRs = std::size(VR);
6174
6175 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6176 // can be passed to the callee in registers.
6177 // For the fast calling convention, there is another check below.
6178 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6179 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6180 if (!HasParameterArea) {
6181 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6182 unsigned AvailableFPRs = NumFPRs;
6183 unsigned AvailableVRs = NumVRs;
6184 unsigned NumBytesTmp = NumBytes;
6185 for (unsigned i = 0; i != NumOps; ++i) {
6186 if (Outs[i].Flags.isNest()) continue;
6187 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6188 PtrByteSize, LinkageSize, ParamAreaSize,
6189 NumBytesTmp, AvailableFPRs, AvailableVRs))
6190 HasParameterArea = true;
6191 }
6192 }
6193
6194 // When using the fast calling convention, we don't provide backing for
6195 // arguments that will be in registers.
6196 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6197
6198 // Avoid allocating parameter area for fastcc functions if all the arguments
6199 // can be passed in the registers.
6200 if (IsFastCall)
6201 HasParameterArea = false;
6202
6203 // Add up all the space actually used.
6204 for (unsigned i = 0; i != NumOps; ++i) {
6205 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6206 EVT ArgVT = Outs[i].VT;
6207 EVT OrigVT = Outs[i].ArgVT;
6208
6209 if (Flags.isNest())
6210 continue;
6211
6212 if (IsFastCall) {
6213 if (Flags.isByVal()) {
6214 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6215 if (NumGPRsUsed > NumGPRs)
6216 HasParameterArea = true;
6217 } else {
6218 switch (ArgVT.getSimpleVT().SimpleTy) {
6219 default: llvm_unreachable("Unexpected ValueType for argument!");
6220 case MVT::i1:
6221 case MVT::i32:
6222 case MVT::i64:
6223 if (++NumGPRsUsed <= NumGPRs)
6224 continue;
6225 break;
6226 case MVT::v4i32:
6227 case MVT::v8i16:
6228 case MVT::v16i8:
6229 case MVT::v2f64:
6230 case MVT::v2i64:
6231 case MVT::v1i128:
6232 case MVT::f128:
6233 if (++NumVRsUsed <= NumVRs)
6234 continue;
6235 break;
6236 case MVT::v4f32:
6237 if (++NumVRsUsed <= NumVRs)
6238 continue;
6239 break;
6240 case MVT::f32:
6241 case MVT::f64:
6242 if (++NumFPRsUsed <= NumFPRs)
6243 continue;
6244 break;
6245 }
6246 HasParameterArea = true;
6247 }
6248 }
6249
6250 /* Respect alignment of argument on the stack. */
6251 auto Alignement =
6252 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6253 NumBytes = alignTo(NumBytes, Alignement);
6254
6255 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6256 if (Flags.isInConsecutiveRegsLast())
6257 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6258 }
6259
6260 unsigned NumBytesActuallyUsed = NumBytes;
6261
6262 // In the old ELFv1 ABI,
6263 // the prolog code of the callee may store up to 8 GPR argument registers to
6264 // the stack, allowing va_start to index over them in memory if its varargs.
6265 // Because we cannot tell if this is needed on the caller side, we have to
6266 // conservatively assume that it is needed. As such, make sure we have at
6267 // least enough stack space for the caller to store the 8 GPRs.
6268 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6269 // really requires memory operands, e.g. a vararg function.
6270 if (HasParameterArea)
6271 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6272 else
6273 NumBytes = LinkageSize;
6274
6275 // Tail call needs the stack to be aligned.
6276 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6277 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6278
6279 int SPDiff = 0;
6280
6281 // Calculate by how many bytes the stack has to be adjusted in case of tail
6282 // call optimization.
6283 if (!IsSibCall)
6284 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6285
6286 // To protect arguments on the stack from being clobbered in a tail call,
6287 // force all the loads to happen before doing any other lowering.
6288 if (CFlags.IsTailCall)
6289 Chain = DAG.getStackArgumentTokenFactor(Chain);
6290
6291 // Adjust the stack pointer for the new arguments...
6292 // These operations are automatically eliminated by the prolog/epilog pass
6293 if (!IsSibCall)
6294 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6295 SDValue CallSeqStart = Chain;
6296
6297 // Load the return address and frame pointer so it can be move somewhere else
6298 // later.
6299 SDValue LROp, FPOp;
6300 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6301
6302 // Set up a copy of the stack pointer for use loading and storing any
6303 // arguments that may not fit in the registers available for argument
6304 // passing.
6305 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6306
6307 // Figure out which arguments are going to go in registers, and which in
6308 // memory. Also, if this is a vararg function, floating point operations
6309 // must be stored to our stack, and loaded into integer regs as well, if
6310 // any integer regs are available for argument passing.
6311 unsigned ArgOffset = LinkageSize;
6312
6314 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6315
6316 SmallVector<SDValue, 8> MemOpChains;
6317 for (unsigned i = 0; i != NumOps; ++i) {
6318 SDValue Arg = OutVals[i];
6319 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6320 EVT ArgVT = Outs[i].VT;
6321 EVT OrigVT = Outs[i].ArgVT;
6322
6323 // PtrOff will be used to store the current argument to the stack if a
6324 // register cannot be found for it.
6325 SDValue PtrOff;
6326
6327 // We re-align the argument offset for each argument, except when using the
6328 // fast calling convention, when we need to make sure we do that only when
6329 // we'll actually use a stack slot.
6330 auto ComputePtrOff = [&]() {
6331 /* Respect alignment of argument on the stack. */
6332 auto Alignment =
6333 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6334 ArgOffset = alignTo(ArgOffset, Alignment);
6335
6336 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6337
6338 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6339 };
6340
6341 if (!IsFastCall) {
6342 ComputePtrOff();
6343
6344 /* Compute GPR index associated with argument offset. */
6345 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6346 GPR_idx = std::min(GPR_idx, NumGPRs);
6347 }
6348
6349 // Promote integers to 64-bit values.
6350 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6351 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6352 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6353 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6354 }
6355
6356 // FIXME memcpy is used way more than necessary. Correctness first.
6357 // Note: "by value" is code for passing a structure by value, not
6358 // basic types.
6359 if (Flags.isByVal()) {
6360 // Note: Size includes alignment padding, so
6361 // struct x { short a; char b; }
6362 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6363 // These are the proper values we need for right-justifying the
6364 // aggregate in a parameter register.
6365 unsigned Size = Flags.getByValSize();
6366
6367 // An empty aggregate parameter takes up no storage and no
6368 // registers.
6369 if (Size == 0)
6370 continue;
6371
6372 if (IsFastCall)
6373 ComputePtrOff();
6374
6375 // All aggregates smaller than 8 bytes must be passed right-justified.
6376 if (Size==1 || Size==2 || Size==4) {
6377 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6378 if (GPR_idx != NumGPRs) {
6379 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6380 MachinePointerInfo(), VT);
6381 MemOpChains.push_back(Load.getValue(1));
6382 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6383
6384 ArgOffset += PtrByteSize;
6385 continue;
6386 }
6387 }
6388
6389 if (GPR_idx == NumGPRs && Size < 8) {
6390 SDValue AddPtr = PtrOff;
6391 if (!isLittleEndian) {
6392 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6393 PtrOff.getValueType());
6394 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6395 }
6396 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6397 CallSeqStart,
6398 Flags, DAG, dl);
6399 ArgOffset += PtrByteSize;
6400 continue;
6401 }
6402 // Copy the object to parameter save area if it can not be entirely passed
6403 // by registers.
6404 // FIXME: we only need to copy the parts which need to be passed in
6405 // parameter save area. For the parts passed by registers, we don't need
6406 // to copy them to the stack although we need to allocate space for them
6407 // in parameter save area.
6408 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6409 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6410 CallSeqStart,
6411 Flags, DAG, dl);
6412
6413 // When a register is available, pass a small aggregate right-justified.
6414 if (Size < 8 && GPR_idx != NumGPRs) {
6415 // The easiest way to get this right-justified in a register
6416 // is to copy the structure into the rightmost portion of a
6417 // local variable slot, then load the whole slot into the
6418 // register.
6419 // FIXME: The memcpy seems to produce pretty awful code for
6420 // small aggregates, particularly for packed ones.
6421 // FIXME: It would be preferable to use the slot in the
6422 // parameter save area instead of a new local variable.
6423 SDValue AddPtr = PtrOff;
6424 if (!isLittleEndian) {
6425 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6426 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6427 }
6428 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6429 CallSeqStart,
6430 Flags, DAG, dl);
6431
6432 // Load the slot into the register.
6433 SDValue Load =
6434 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6435 MemOpChains.push_back(Load.getValue(1));
6436 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6437
6438 // Done with this argument.
6439 ArgOffset += PtrByteSize;
6440 continue;
6441 }
6442
6443 // For aggregates larger than PtrByteSize, copy the pieces of the
6444 // object that fit into registers from the parameter save area.
6445 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6446 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6447 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6448 if (GPR_idx != NumGPRs) {
6449 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6450 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6451 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6452 MachinePointerInfo(), ObjType);
6453
6454 MemOpChains.push_back(Load.getValue(1));
6455 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6456 ArgOffset += PtrByteSize;
6457 } else {
6458 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6459 break;
6460 }
6461 }
6462 continue;
6463 }
6464
6465 switch (Arg.getSimpleValueType().SimpleTy) {
6466 default: llvm_unreachable("Unexpected ValueType for argument!");
6467 case MVT::i1:
6468 case MVT::i32:
6469 case MVT::i64:
6470 if (Flags.isNest()) {
6471 // The 'nest' parameter, if any, is passed in R11.
6472 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6473 break;
6474 }
6475
6476 // These can be scalar arguments or elements of an integer array type
6477 // passed directly. Clang may use those instead of "byval" aggregate
6478 // types to avoid forcing arguments to memory unnecessarily.
6479 if (GPR_idx != NumGPRs) {
6480 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6481 } else {
6482 if (IsFastCall)
6483 ComputePtrOff();
6484
6485 assert(HasParameterArea &&
6486 "Parameter area must exist to pass an argument in memory.");
6487 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6488 true, CFlags.IsTailCall, false, MemOpChains,
6489 TailCallArguments, dl);
6490 if (IsFastCall)
6491 ArgOffset += PtrByteSize;
6492 }
6493 if (!IsFastCall)
6494 ArgOffset += PtrByteSize;
6495 break;
6496 case MVT::f32:
6497 case MVT::f64: {
6498 // These can be scalar arguments or elements of a float array type
6499 // passed directly. The latter are used to implement ELFv2 homogenous
6500 // float aggregates.
6501
6502 // Named arguments go into FPRs first, and once they overflow, the
6503 // remaining arguments go into GPRs and then the parameter save area.
6504 // Unnamed arguments for vararg functions always go to GPRs and
6505 // then the parameter save area. For now, put all arguments to vararg
6506 // routines always in both locations (FPR *and* GPR or stack slot).
6507 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6508 bool NeededLoad = false;
6509
6510 // First load the argument into the next available FPR.
6511 if (FPR_idx != NumFPRs)
6512 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6513
6514 // Next, load the argument into GPR or stack slot if needed.
6515 if (!NeedGPROrStack)
6516 ;
6517 else if (GPR_idx != NumGPRs && !IsFastCall) {
6518 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6519 // once we support fp <-> gpr moves.
6520
6521 // In the non-vararg case, this can only ever happen in the
6522 // presence of f32 array types, since otherwise we never run
6523 // out of FPRs before running out of GPRs.
6524 SDValue ArgVal;
6525
6526 // Double values are always passed in a single GPR.
6527 if (Arg.getValueType() != MVT::f32) {
6528 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6529
6530 // Non-array float values are extended and passed in a GPR.
6531 } else if (!Flags.isInConsecutiveRegs()) {
6532 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6533 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6534
6535 // If we have an array of floats, we collect every odd element
6536 // together with its predecessor into one GPR.
6537 } else if (ArgOffset % PtrByteSize != 0) {
6538 SDValue Lo, Hi;
6539 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6540 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6541 if (!isLittleEndian)
6542 std::swap(Lo, Hi);
6543 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6544
6545 // The final element, if even, goes into the first half of a GPR.
6546 } else if (Flags.isInConsecutiveRegsLast()) {
6547 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6548 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6549 if (!isLittleEndian)
6550 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6551 DAG.getConstant(32, dl, MVT::i32));
6552
6553 // Non-final even elements are skipped; they will be handled
6554 // together the with subsequent argument on the next go-around.
6555 } else
6556 ArgVal = SDValue();
6557
6558 if (ArgVal.getNode())
6559 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6560 } else {
6561 if (IsFastCall)
6562 ComputePtrOff();
6563
6564 // Single-precision floating-point values are mapped to the
6565 // second (rightmost) word of the stack doubleword.
6566 if (Arg.getValueType() == MVT::f32 &&
6567 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6568 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6569 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6570 }
6571
6572 assert(HasParameterArea &&
6573 "Parameter area must exist to pass an argument in memory.");
6574 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6575 true, CFlags.IsTailCall, false, MemOpChains,
6576 TailCallArguments, dl);
6577
6578 NeededLoad = true;
6579 }
6580 // When passing an array of floats, the array occupies consecutive
6581 // space in the argument area; only round up to the next doubleword
6582 // at the end of the array. Otherwise, each float takes 8 bytes.
6583 if (!IsFastCall || NeededLoad) {
6584 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6585 Flags.isInConsecutiveRegs()) ? 4 : 8;
6586 if (Flags.isInConsecutiveRegsLast())
6587 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6588 }
6589 break;
6590 }
6591 case MVT::v4f32:
6592 case MVT::v4i32:
6593 case MVT::v8i16:
6594 case MVT::v16i8:
6595 case MVT::v2f64:
6596 case MVT::v2i64:
6597 case MVT::v1i128:
6598 case MVT::f128:
6599 // These can be scalar arguments or elements of a vector array type
6600 // passed directly. The latter are used to implement ELFv2 homogenous
6601 // vector aggregates.
6602
6603 // For a varargs call, named arguments go into VRs or on the stack as
6604 // usual; unnamed arguments always go to the stack or the corresponding
6605 // GPRs when within range. For now, we always put the value in both
6606 // locations (or even all three).
6607 if (CFlags.IsVarArg) {
6608 assert(HasParameterArea &&
6609 "Parameter area must exist if we have a varargs call.");
6610 // We could elide this store in the case where the object fits
6611 // entirely in R registers. Maybe later.
6612 SDValue Store =
6613 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6614 MemOpChains.push_back(Store);
6615 if (VR_idx != NumVRs) {
6616 SDValue Load =
6617 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6618 MemOpChains.push_back(Load.getValue(1));
6619 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6620 }
6621 ArgOffset += 16;
6622 for (unsigned i=0; i<16; i+=PtrByteSize) {
6623 if (GPR_idx == NumGPRs)
6624 break;
6625 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6626 DAG.getConstant(i, dl, PtrVT));
6627 SDValue Load =
6628 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6629 MemOpChains.push_back(Load.getValue(1));
6630 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6631 }
6632 break;
6633 }
6634
6635 // Non-varargs Altivec params go into VRs or on the stack.
6636 if (VR_idx != NumVRs) {
6637 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6638 } else {
6639 if (IsFastCall)
6640 ComputePtrOff();
6641
6642 assert(HasParameterArea &&
6643 "Parameter area must exist to pass an argument in memory.");
6644 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6645 true, CFlags.IsTailCall, true, MemOpChains,
6646 TailCallArguments, dl);
6647 if (IsFastCall)
6648 ArgOffset += 16;
6649 }
6650
6651 if (!IsFastCall)
6652 ArgOffset += 16;
6653 break;
6654 }
6655 }
6656
6657 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6658 "mismatch in size of parameter area");
6659 (void)NumBytesActuallyUsed;
6660
6661 if (!MemOpChains.empty())
6662 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6663
6664 // Check if this is an indirect call (MTCTR/BCTRL).
6665 // See prepareDescriptorIndirectCall and buildCallOperands for more
6666 // information about calls through function pointers in the 64-bit SVR4 ABI.
6667 if (CFlags.IsIndirect) {
6668 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6669 // caller in the TOC save area.
6670 if (isTOCSaveRestoreRequired(Subtarget)) {
6671 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6672 // Load r2 into a virtual register and store it to the TOC save area.
6673 setUsesTOCBasePtr(DAG);
6674 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6675 // TOC save area offset.
6676 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6677 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6678 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6679 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6681 DAG.getMachineFunction(), TOCSaveOffset));
6682 }
6683 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6684 // This does not mean the MTCTR instruction must use R12; it's easier
6685 // to model this as an extra parameter, so do that.
6686 if (isELFv2ABI && !CFlags.IsPatchPoint)
6687 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6688 }
6689
6690 // Build a sequence of copy-to-reg nodes chained together with token chain
6691 // and flag operands which copy the outgoing args into the appropriate regs.
6692 SDValue InGlue;
6693 for (const auto &[Reg, N] : RegsToPass) {
6694 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6695 InGlue = Chain.getValue(1);
6696 }
6697
6698 if (CFlags.IsTailCall && !IsSibCall)
6699 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6700 TailCallArguments);
6701
6702 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6703 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6704}
6705
6706// Returns true when the shadow of a general purpose argument register
6707// in the parameter save area is aligned to at least 'RequiredAlign'.
6708static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6709 assert(RequiredAlign.value() <= 16 &&
6710 "Required alignment greater than stack alignment.");
6711 switch (Reg) {
6712 default:
6713 report_fatal_error("called on invalid register.");
6714 case PPC::R5:
6715 case PPC::R9:
6716 case PPC::X3:
6717 case PPC::X5:
6718 case PPC::X7:
6719 case PPC::X9:
6720 // These registers are 16 byte aligned which is the most strict aligment
6721 // we can support.
6722 return true;
6723 case PPC::R3:
6724 case PPC::R7:
6725 case PPC::X4:
6726 case PPC::X6:
6727 case PPC::X8:
6728 case PPC::X10:
6729 // The shadow of these registers in the PSA is 8 byte aligned.
6730 return RequiredAlign <= 8;
6731 case PPC::R4:
6732 case PPC::R6:
6733 case PPC::R8:
6734 case PPC::R10:
6735 return RequiredAlign <= 4;
6736 }
6737}
6738
6739static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6740 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6741 Type *OrigTy, CCState &State) {
6742 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6743 State.getMachineFunction().getSubtarget());
6744 const bool IsPPC64 = Subtarget.isPPC64();
6745 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6746 const Align PtrAlign(PtrSize);
6747 const Align StackAlign(16);
6748 const MVT RegVT = Subtarget.getScalarIntVT();
6749
6750 if (ValVT == MVT::f128)
6751 report_fatal_error("f128 is unimplemented on AIX.");
6752
6753 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6754 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6755 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6756 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6757 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6758 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6759
6760 static const MCPhysReg VR[] = {// Vector registers.
6761 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6762 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6763 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6764
6765 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6766
6767 if (ArgFlags.isNest()) {
6768 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6769 if (!EnvReg)
6770 report_fatal_error("More then one nest argument.");
6771 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6772 return false;
6773 }
6774
6775 if (ArgFlags.isByVal()) {
6776 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6777 if (ByValAlign > StackAlign)
6778 report_fatal_error("Pass-by-value arguments with alignment greater than "
6779 "16 are not supported.");
6780
6781 const unsigned ByValSize = ArgFlags.getByValSize();
6782 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6783
6784 // An empty aggregate parameter takes up no storage and no registers,
6785 // but needs a MemLoc for a stack slot for the formal arguments side.
6786 if (ByValSize == 0) {
6788 State.getStackSize(), RegVT, LocInfo));
6789 return false;
6790 }
6791
6792 // Shadow allocate any registers that are not properly aligned.
6793 unsigned NextReg = State.getFirstUnallocated(GPRs);
6794 while (NextReg != GPRs.size() &&
6795 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6796 // Shadow allocate next registers since its aligment is not strict enough.
6797 MCRegister Reg = State.AllocateReg(GPRs);
6798 // Allocate the stack space shadowed by said register.
6799 State.AllocateStack(PtrSize, PtrAlign);
6800 assert(Reg && "Alocating register unexpectedly failed.");
6801 (void)Reg;
6802 NextReg = State.getFirstUnallocated(GPRs);
6803 }
6804
6805 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6806 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6807 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6808 if (MCRegister Reg = State.AllocateReg(GPRs))
6809 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6810 else {
6813 LocInfo));
6814 break;
6815 }
6816 }
6817 return false;
6818 }
6819
6820 // Arguments always reserve parameter save area.
6821 switch (ValVT.SimpleTy) {
6822 default:
6823 report_fatal_error("Unhandled value type for argument.");
6824 case MVT::i64:
6825 // i64 arguments should have been split to i32 for PPC32.
6826 assert(IsPPC64 && "PPC32 should have split i64 values.");
6827 [[fallthrough]];
6828 case MVT::i1:
6829 case MVT::i32: {
6830 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6831 // AIX integer arguments are always passed in register width.
6832 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6833 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6835 if (MCRegister Reg = State.AllocateReg(GPRs))
6836 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6837 else
6838 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6839
6840 return false;
6841 }
6842 case MVT::f32:
6843 case MVT::f64: {
6844 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6845 const unsigned StoreSize = LocVT.getStoreSize();
6846 // Floats are always 4-byte aligned in the PSA on AIX.
6847 // This includes f64 in 64-bit mode for ABI compatibility.
6848 const unsigned Offset =
6849 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6850 MCRegister FReg = State.AllocateReg(FPR);
6851 if (FReg)
6852 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6853
6854 // Reserve and initialize GPRs or initialize the PSA as required.
6855 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6856 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6857 assert(FReg && "An FPR should be available when a GPR is reserved.");
6858 if (State.isVarArg()) {
6859 // Successfully reserved GPRs are only initialized for vararg calls.
6860 // Custom handling is required for:
6861 // f64 in PPC32 needs to be split into 2 GPRs.
6862 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6863 State.addLoc(
6864 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6865 }
6866 } else {
6867 // If there are insufficient GPRs, the PSA needs to be initialized.
6868 // Initialization occurs even if an FPR was initialized for
6869 // compatibility with the AIX XL compiler. The full memory for the
6870 // argument will be initialized even if a prior word is saved in GPR.
6871 // A custom memLoc is used when the argument also passes in FPR so
6872 // that the callee handling can skip over it easily.
6873 State.addLoc(
6874 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6875 LocInfo)
6876 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6877 break;
6878 }
6879 }
6880
6881 return false;
6882 }
6883 case MVT::v4f32:
6884 case MVT::v4i32:
6885 case MVT::v8i16:
6886 case MVT::v16i8:
6887 case MVT::v2i64:
6888 case MVT::v2f64:
6889 case MVT::v1i128: {
6890 const unsigned VecSize = 16;
6891 const Align VecAlign(VecSize);
6892
6893 if (!State.isVarArg()) {
6894 // If there are vector registers remaining we don't consume any stack
6895 // space.
6896 if (MCRegister VReg = State.AllocateReg(VR)) {
6897 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6898 return false;
6899 }
6900 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6901 // might be allocated in the portion of the PSA that is shadowed by the
6902 // GPRs.
6903 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6904 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6905 return false;
6906 }
6907
6908 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6909 // Burn any underaligned registers and their shadowed stack space until
6910 // we reach the required alignment.
6911 while (NextRegIndex != GPRs.size() &&
6912 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6913 // Shadow allocate register and its stack shadow.
6914 MCRegister Reg = State.AllocateReg(GPRs);
6915 State.AllocateStack(PtrSize, PtrAlign);
6916 assert(Reg && "Allocating register unexpectedly failed.");
6917 (void)Reg;
6918 NextRegIndex = State.getFirstUnallocated(GPRs);
6919 }
6920
6921 // Vectors that are passed as fixed arguments are handled differently.
6922 // They are passed in VRs if any are available (unlike arguments passed
6923 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6924 // functions)
6925 if (!ArgFlags.isVarArg()) {
6926 if (MCRegister VReg = State.AllocateReg(VR)) {
6927 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6928 // Shadow allocate GPRs and stack space even though we pass in a VR.
6929 for (unsigned I = 0; I != VecSize; I += PtrSize)
6930 State.AllocateReg(GPRs);
6931 State.AllocateStack(VecSize, VecAlign);
6932 return false;
6933 }
6934 // No vector registers remain so pass on the stack.
6935 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6936 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6937 return false;
6938 }
6939
6940 // If all GPRS are consumed then we pass the argument fully on the stack.
6941 if (NextRegIndex == GPRs.size()) {
6942 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6943 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6944 return false;
6945 }
6946
6947 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6948 // half of the argument, and then need to pass the remaining half on the
6949 // stack.
6950 if (GPRs[NextRegIndex] == PPC::R9) {
6951 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6952 State.addLoc(
6953 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6954
6955 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6956 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6957 assert(FirstReg && SecondReg &&
6958 "Allocating R9 or R10 unexpectedly failed.");
6959 State.addLoc(
6960 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6961 State.addLoc(
6962 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6963 return false;
6964 }
6965
6966 // We have enough GPRs to fully pass the vector argument, and we have
6967 // already consumed any underaligned registers. Start with the custom
6968 // MemLoc and then the custom RegLocs.
6969 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6970 State.addLoc(
6971 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6972 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6973 const MCRegister Reg = State.AllocateReg(GPRs);
6974 assert(Reg && "Failed to allocated register for vararg vector argument");
6975 State.addLoc(
6976 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6977 }
6978 return false;
6979 }
6980 }
6981 return true;
6982}
6983
6984// So far, this function is only used by LowerFormalArguments_AIX()
6986 bool IsPPC64,
6987 bool HasP8Vector,
6988 bool HasVSX) {
6989 assert((IsPPC64 || SVT != MVT::i64) &&
6990 "i64 should have been split for 32-bit codegen.");
6991
6992 switch (SVT) {
6993 default:
6994 report_fatal_error("Unexpected value type for formal argument");
6995 case MVT::i1:
6996 case MVT::i32:
6997 case MVT::i64:
6998 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6999 case MVT::f32:
7000 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7001 case MVT::f64:
7002 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7003 case MVT::v4f32:
7004 case MVT::v4i32:
7005 case MVT::v8i16:
7006 case MVT::v16i8:
7007 case MVT::v2i64:
7008 case MVT::v2f64:
7009 case MVT::v1i128:
7010 return &PPC::VRRCRegClass;
7011 }
7012}
7013
7015 SelectionDAG &DAG, SDValue ArgValue,
7016 MVT LocVT, const SDLoc &dl) {
7017 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7018 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7019
7020 if (Flags.isSExt())
7021 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7022 DAG.getValueType(ValVT));
7023 else if (Flags.isZExt())
7024 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7025 DAG.getValueType(ValVT));
7026
7027 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7028}
7029
7030static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7031 const unsigned LASize = FL->getLinkageSize();
7032
7033 if (PPC::GPRCRegClass.contains(Reg)) {
7034 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7035 "Reg must be a valid argument register!");
7036 return LASize + 4 * (Reg - PPC::R3);
7037 }
7038
7039 if (PPC::G8RCRegClass.contains(Reg)) {
7040 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7041 "Reg must be a valid argument register!");
7042 return LASize + 8 * (Reg - PPC::X3);
7043 }
7044
7045 llvm_unreachable("Only general purpose registers expected.");
7046}
7047
7048// AIX ABI Stack Frame Layout:
7049//
7050// Low Memory +--------------------------------------------+
7051// SP +---> | Back chain | ---+
7052// | +--------------------------------------------+ |
7053// | | Saved Condition Register | |
7054// | +--------------------------------------------+ |
7055// | | Saved Linkage Register | |
7056// | +--------------------------------------------+ | Linkage Area
7057// | | Reserved for compilers | |
7058// | +--------------------------------------------+ |
7059// | | Reserved for binders | |
7060// | +--------------------------------------------+ |
7061// | | Saved TOC pointer | ---+
7062// | +--------------------------------------------+
7063// | | Parameter save area |
7064// | +--------------------------------------------+
7065// | | Alloca space |
7066// | +--------------------------------------------+
7067// | | Local variable space |
7068// | +--------------------------------------------+
7069// | | Float/int conversion temporary |
7070// | +--------------------------------------------+
7071// | | Save area for AltiVec registers |
7072// | +--------------------------------------------+
7073// | | AltiVec alignment padding |
7074// | +--------------------------------------------+
7075// | | Save area for VRSAVE register |
7076// | +--------------------------------------------+
7077// | | Save area for General Purpose registers |
7078// | +--------------------------------------------+
7079// | | Save area for Floating Point registers |
7080// | +--------------------------------------------+
7081// +---- | Back chain |
7082// High Memory +--------------------------------------------+
7083//
7084// Specifications:
7085// AIX 7.2 Assembler Language Reference
7086// Subroutine linkage convention
7087
7088SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7089 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7090 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7091 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7092
7093 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7094 CallConv == CallingConv::Fast) &&
7095 "Unexpected calling convention!");
7096
7097 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7098 report_fatal_error("Tail call support is unimplemented on AIX.");
7099
7100 if (useSoftFloat())
7101 report_fatal_error("Soft float support is unimplemented on AIX.");
7102
7103 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7104
7105 const bool IsPPC64 = Subtarget.isPPC64();
7106 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7107
7108 // Assign locations to all of the incoming arguments.
7110 MachineFunction &MF = DAG.getMachineFunction();
7111 MachineFrameInfo &MFI = MF.getFrameInfo();
7112 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7113 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7114
7115 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7116 // Reserve space for the linkage area on the stack.
7117 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7118 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7119 uint64_t SaveStackPos = CCInfo.getStackSize();
7120 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7121 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7122
7124
7125 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7126 CCValAssign &VA = ArgLocs[I++];
7127 MVT LocVT = VA.getLocVT();
7128 MVT ValVT = VA.getValVT();
7129 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7130
7131 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7132 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7133 // For compatibility with the AIX XL compiler, the float args in the
7134 // parameter save area are initialized even if the argument is available
7135 // in register. The caller is required to initialize both the register
7136 // and memory, however, the callee can choose to expect it in either.
7137 // The memloc is dismissed here because the argument is retrieved from
7138 // the register.
7139 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7140 continue;
7141
7142 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7143 const TargetRegisterClass *RegClass = getRegClassForSVT(
7144 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7145 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7146 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7147 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7148 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7149 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7150 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7151 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7152 MachinePointerInfo(), Align(PtrByteSize));
7153 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7154 MemOps.push_back(StoreReg);
7155 }
7156
7157 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7158 unsigned StoreSize =
7159 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7160 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7161 }
7162
7163 auto HandleMemLoc = [&]() {
7164 const unsigned LocSize = LocVT.getStoreSize();
7165 const unsigned ValSize = ValVT.getStoreSize();
7166 assert((ValSize <= LocSize) &&
7167 "Object size is larger than size of MemLoc");
7168 int CurArgOffset = VA.getLocMemOffset();
7169 // Objects are right-justified because AIX is big-endian.
7170 if (LocSize > ValSize)
7171 CurArgOffset += LocSize - ValSize;
7172 // Potential tail calls could cause overwriting of argument stack slots.
7173 const bool IsImmutable =
7175 (CallConv == CallingConv::Fast));
7176 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7177 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7178 SDValue ArgValue =
7179 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7180
7181 // While the ABI specifies the argument type is (sign or zero) extended
7182 // out to register width, not all code is compliant. We truncate and
7183 // re-extend to be more forgiving of these callers when the argument type
7184 // is smaller than register width.
7185 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7186 ValVT.isInteger() &&
7187 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7188 // It is possible to have either real integer values
7189 // or integers that were not originally integers.
7190 // In the latter case, these could have came from structs,
7191 // and these integers would not have an extend on the parameter.
7192 // Since these types of integers do not have an extend specified
7193 // in the first place, the type of extend that we do should not matter.
7194 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7195 ? MVT::i8
7196 : ArgVT;
7197 SDValue ArgValueTrunc =
7198 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7199 SDValue ArgValueExt =
7200 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7201 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7202 InVals.push_back(ArgValueExt);
7203 } else {
7204 InVals.push_back(ArgValue);
7205 }
7206 };
7207
7208 // Vector arguments to VaArg functions are passed both on the stack, and
7209 // in any available GPRs. Load the value from the stack and add the GPRs
7210 // as live ins.
7211 if (VA.isMemLoc() && VA.needsCustom()) {
7212 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7213 assert(isVarArg && "Only use custom memloc for vararg.");
7214 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7215 // matching custom RegLocs.
7216 const unsigned OriginalValNo = VA.getValNo();
7217 (void)OriginalValNo;
7218
7219 auto HandleCustomVecRegLoc = [&]() {
7220 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7221 "Missing custom RegLoc.");
7222 VA = ArgLocs[I++];
7223 assert(VA.getValVT().isVector() &&
7224 "Unexpected Val type for custom RegLoc.");
7225 assert(VA.getValNo() == OriginalValNo &&
7226 "ValNo mismatch between custom MemLoc and RegLoc.");
7228 MF.addLiveIn(VA.getLocReg(),
7229 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7230 Subtarget.hasVSX()));
7231 };
7232
7233 HandleMemLoc();
7234 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7235 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7236 // R10.
7237 HandleCustomVecRegLoc();
7238 HandleCustomVecRegLoc();
7239
7240 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7241 // we passed the vector in R5, R6, R7 and R8.
7242 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7243 assert(!IsPPC64 &&
7244 "Only 2 custom RegLocs expected for 64-bit codegen.");
7245 HandleCustomVecRegLoc();
7246 HandleCustomVecRegLoc();
7247 }
7248
7249 continue;
7250 }
7251
7252 if (VA.isRegLoc()) {
7253 if (VA.getValVT().isScalarInteger())
7255 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7256 switch (VA.getValVT().SimpleTy) {
7257 default:
7258 report_fatal_error("Unhandled value type for argument.");
7259 case MVT::f32:
7261 break;
7262 case MVT::f64:
7264 break;
7265 }
7266 } else if (VA.getValVT().isVector()) {
7267 switch (VA.getValVT().SimpleTy) {
7268 default:
7269 report_fatal_error("Unhandled value type for argument.");
7270 case MVT::v16i8:
7272 break;
7273 case MVT::v8i16:
7275 break;
7276 case MVT::v4i32:
7277 case MVT::v2i64:
7278 case MVT::v1i128:
7280 break;
7281 case MVT::v4f32:
7282 case MVT::v2f64:
7284 break;
7285 }
7286 }
7287 }
7288
7289 if (Flags.isByVal() && VA.isMemLoc()) {
7290 const unsigned Size =
7291 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7292 PtrByteSize);
7293 const int FI = MF.getFrameInfo().CreateFixedObject(
7294 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7295 /* IsAliased */ true);
7296 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7297 InVals.push_back(FIN);
7298
7299 continue;
7300 }
7301
7302 if (Flags.isByVal()) {
7303 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7304
7305 const MCPhysReg ArgReg = VA.getLocReg();
7306 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7307
7308 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7309 const int FI = MF.getFrameInfo().CreateFixedObject(
7310 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7311 /* IsAliased */ true);
7312 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7313 InVals.push_back(FIN);
7314
7315 // Add live ins for all the RegLocs for the same ByVal.
7316 const TargetRegisterClass *RegClass =
7317 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7318
7319 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7320 unsigned Offset) {
7321 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7322 // Since the callers side has left justified the aggregate in the
7323 // register, we can simply store the entire register into the stack
7324 // slot.
7325 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7326 // The store to the fixedstack object is needed becuase accessing a
7327 // field of the ByVal will use a gep and load. Ideally we will optimize
7328 // to extracting the value from the register directly, and elide the
7329 // stores when the arguments address is not taken, but that will need to
7330 // be future work.
7331 SDValue Store = DAG.getStore(
7332 CopyFrom.getValue(1), dl, CopyFrom,
7335
7336 MemOps.push_back(Store);
7337 };
7338
7339 unsigned Offset = 0;
7340 HandleRegLoc(VA.getLocReg(), Offset);
7341 Offset += PtrByteSize;
7342 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7343 Offset += PtrByteSize) {
7344 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7345 "RegLocs should be for ByVal argument.");
7346
7347 const CCValAssign RL = ArgLocs[I++];
7348 HandleRegLoc(RL.getLocReg(), Offset);
7350 }
7351
7352 if (Offset != StackSize) {
7353 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7354 "Expected MemLoc for remaining bytes.");
7355 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7356 // Consume the MemLoc.The InVal has already been emitted, so nothing
7357 // more needs to be done.
7358 ++I;
7359 }
7360
7361 continue;
7362 }
7363
7364 if (VA.isRegLoc() && !VA.needsCustom()) {
7365 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7366 Register VReg =
7367 MF.addLiveIn(VA.getLocReg(),
7368 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7369 Subtarget.hasVSX()));
7370 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7371 if (ValVT.isScalarInteger() &&
7372 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7373 ArgValue =
7374 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7375 }
7376 InVals.push_back(ArgValue);
7377 continue;
7378 }
7379 if (VA.isMemLoc()) {
7380 HandleMemLoc();
7381 continue;
7382 }
7383 }
7384
7385 // On AIX a minimum of 8 words is saved to the parameter save area.
7386 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7387 // Area that is at least reserved in the caller of this function.
7388 unsigned CallerReservedArea = std::max<unsigned>(
7389 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7390
7391 // Set the size that is at least reserved in caller of this function. Tail
7392 // call optimized function's reserved stack space needs to be aligned so
7393 // that taking the difference between two stack areas will result in an
7394 // aligned stack.
7395 CallerReservedArea =
7396 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7397 FuncInfo->setMinReservedArea(CallerReservedArea);
7398
7399 if (isVarArg) {
7400 int VAListIndex = 0;
7401 // If any of the optional arguments are passed in register then the fixed
7402 // stack object we spill into is not immutable. Create a fixed stack object
7403 // that overlaps the remainder of the parameter save area.
7404 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7405 unsigned FixedStackSize =
7406 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7407 VAListIndex =
7408 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7409 /* IsImmutable */ false, /* IsAliased */ true);
7410 } else {
7411 // All the arguments passed through ellipses are on the stack. Create a
7412 // dummy fixed stack object the same size as a pointer since we don't
7413 // know the actual size.
7414 VAListIndex =
7415 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7416 /* IsImmutable */ true, /* IsAliased */ true);
7417 }
7418
7419 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7420 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7421
7422 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7423 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7424
7425 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7426 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7427 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7428
7429 // The fixed integer arguments of a variadic function are stored to the
7430 // VarArgsFrameIndex on the stack so that they may be loaded by
7431 // dereferencing the result of va_next.
7432 for (unsigned
7433 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7434 Offset = 0;
7435 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7436
7437 const Register VReg =
7438 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7439 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7440
7441 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7442 MachinePointerInfo MPI =
7443 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7444 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7445 MemOps.push_back(Store);
7446 // Increment the address for the next argument to store.
7447 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7448 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7449 }
7450 }
7451
7452 if (!MemOps.empty())
7453 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7454
7455 return Chain;
7456}
7457
7458SDValue PPCTargetLowering::LowerCall_AIX(
7459 SDValue Chain, SDValue Callee, CallFlags CFlags,
7461 const SmallVectorImpl<SDValue> &OutVals,
7462 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7464 const CallBase *CB) const {
7465 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7466 // AIX ABI stack frame layout.
7467
7468 assert((CFlags.CallConv == CallingConv::C ||
7469 CFlags.CallConv == CallingConv::Cold ||
7470 CFlags.CallConv == CallingConv::Fast) &&
7471 "Unexpected calling convention!");
7472
7473 if (CFlags.IsPatchPoint)
7474 report_fatal_error("This call type is unimplemented on AIX.");
7475
7476 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7477
7478 MachineFunction &MF = DAG.getMachineFunction();
7480 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7481 *DAG.getContext());
7482
7483 // Reserve space for the linkage save area (LSA) on the stack.
7484 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7485 // [SP][CR][LR][2 x reserved][TOC].
7486 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7487 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7488 const bool IsPPC64 = Subtarget.isPPC64();
7489 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7490 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7491 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7492 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7493
7494 // The prolog code of the callee may store up to 8 GPR argument registers to
7495 // the stack, allowing va_start to index over them in memory if the callee
7496 // is variadic.
7497 // Because we cannot tell if this is needed on the caller side, we have to
7498 // conservatively assume that it is needed. As such, make sure we have at
7499 // least enough stack space for the caller to store the 8 GPRs.
7500 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7501 const unsigned NumBytes = std::max<unsigned>(
7502 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7503
7504 // Adjust the stack pointer for the new arguments...
7505 // These operations are automatically eliminated by the prolog/epilog pass.
7506 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7507 SDValue CallSeqStart = Chain;
7508
7510 SmallVector<SDValue, 8> MemOpChains;
7511
7512 // Set up a copy of the stack pointer for loading and storing any
7513 // arguments that may not fit in the registers available for argument
7514 // passing.
7515 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7516 : DAG.getRegister(PPC::R1, MVT::i32);
7517
7518 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7519 const unsigned ValNo = ArgLocs[I].getValNo();
7520 SDValue Arg = OutVals[ValNo];
7521 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7522
7523 if (Flags.isByVal()) {
7524 const unsigned ByValSize = Flags.getByValSize();
7525
7526 // Nothing to do for zero-sized ByVals on the caller side.
7527 if (!ByValSize) {
7528 ++I;
7529 continue;
7530 }
7531
7532 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7533 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7534 (LoadOffset != 0)
7535 ? DAG.getObjectPtrOffset(
7536 dl, Arg, TypeSize::getFixed(LoadOffset))
7537 : Arg,
7538 MachinePointerInfo(), VT);
7539 };
7540
7541 unsigned LoadOffset = 0;
7542
7543 // Initialize registers, which are fully occupied by the by-val argument.
7544 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7545 SDValue Load = GetLoad(PtrVT, LoadOffset);
7546 MemOpChains.push_back(Load.getValue(1));
7547 LoadOffset += PtrByteSize;
7548 const CCValAssign &ByValVA = ArgLocs[I++];
7549 assert(ByValVA.getValNo() == ValNo &&
7550 "Unexpected location for pass-by-value argument.");
7551 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7552 }
7553
7554 if (LoadOffset == ByValSize)
7555 continue;
7556
7557 // There must be one more loc to handle the remainder.
7558 assert(ArgLocs[I].getValNo() == ValNo &&
7559 "Expected additional location for by-value argument.");
7560
7561 if (ArgLocs[I].isMemLoc()) {
7562 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7563 const CCValAssign &ByValVA = ArgLocs[I++];
7564 ISD::ArgFlagsTy MemcpyFlags = Flags;
7565 // Only memcpy the bytes that don't pass in register.
7566 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7567 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7568 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7569 dl, Arg, TypeSize::getFixed(LoadOffset))
7570 : Arg,
7572 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7573 CallSeqStart, MemcpyFlags, DAG, dl);
7574 continue;
7575 }
7576
7577 // Initialize the final register residue.
7578 // Any residue that occupies the final by-val arg register must be
7579 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7580 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7581 // 2 and 1 byte loads.
7582 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7583 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7584 "Unexpected register residue for by-value argument.");
7585 SDValue ResidueVal;
7586 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7587 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7588 const MVT VT =
7589 N == 1 ? MVT::i8
7590 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7591 SDValue Load = GetLoad(VT, LoadOffset);
7592 MemOpChains.push_back(Load.getValue(1));
7593 LoadOffset += N;
7594 Bytes += N;
7595
7596 // By-val arguments are passed left-justfied in register.
7597 // Every load here needs to be shifted, otherwise a full register load
7598 // should have been used.
7599 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7600 "Unexpected load emitted during handling of pass-by-value "
7601 "argument.");
7602 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7603 EVT ShiftAmountTy =
7604 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7605 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7606 SDValue ShiftedLoad =
7607 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7608 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7609 ShiftedLoad)
7610 : ShiftedLoad;
7611 }
7612
7613 const CCValAssign &ByValVA = ArgLocs[I++];
7614 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7615 continue;
7616 }
7617
7618 CCValAssign &VA = ArgLocs[I++];
7619 const MVT LocVT = VA.getLocVT();
7620 const MVT ValVT = VA.getValVT();
7621
7622 switch (VA.getLocInfo()) {
7623 default:
7624 report_fatal_error("Unexpected argument extension type.");
7625 case CCValAssign::Full:
7626 break;
7627 case CCValAssign::ZExt:
7628 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7629 break;
7630 case CCValAssign::SExt:
7631 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7632 break;
7633 }
7634
7635 if (VA.isRegLoc() && !VA.needsCustom()) {
7636 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7637 continue;
7638 }
7639
7640 // Vector arguments passed to VarArg functions need custom handling when
7641 // they are passed (at least partially) in GPRs.
7642 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7643 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7644 // Store value to its stack slot.
7645 SDValue PtrOff =
7646 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7647 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7648 SDValue Store =
7649 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7650 MemOpChains.push_back(Store);
7651 const unsigned OriginalValNo = VA.getValNo();
7652 // Then load the GPRs from the stack
7653 unsigned LoadOffset = 0;
7654 auto HandleCustomVecRegLoc = [&]() {
7655 assert(I != E && "Unexpected end of CCvalAssigns.");
7656 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7657 "Expected custom RegLoc.");
7658 CCValAssign RegVA = ArgLocs[I++];
7659 assert(RegVA.getValNo() == OriginalValNo &&
7660 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7661 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7662 DAG.getConstant(LoadOffset, dl, PtrVT));
7663 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7664 MemOpChains.push_back(Load.getValue(1));
7665 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7666 LoadOffset += PtrByteSize;
7667 };
7668
7669 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7670 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7671 // R10.
7672 HandleCustomVecRegLoc();
7673 HandleCustomVecRegLoc();
7674
7675 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7676 ArgLocs[I].getValNo() == OriginalValNo) {
7677 assert(!IsPPC64 &&
7678 "Only 2 custom RegLocs expected for 64-bit codegen.");
7679 HandleCustomVecRegLoc();
7680 HandleCustomVecRegLoc();
7681 }
7682
7683 continue;
7684 }
7685
7686 if (VA.isMemLoc()) {
7687 SDValue PtrOff =
7688 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7689 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7690 MemOpChains.push_back(
7691 DAG.getStore(Chain, dl, Arg, PtrOff,
7693 Subtarget.getFrameLowering()->getStackAlign()));
7694
7695 continue;
7696 }
7697
7698 if (!ValVT.isFloatingPoint())
7700 "Unexpected register handling for calling convention.");
7701
7702 // Custom handling is used for GPR initializations for vararg float
7703 // arguments.
7704 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7705 LocVT.isInteger() &&
7706 "Custom register handling only expected for VarArg.");
7707
7708 SDValue ArgAsInt =
7709 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7710
7711 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7712 // f32 in 32-bit GPR
7713 // f64 in 64-bit GPR
7714 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7715 else if (Arg.getValueType().getFixedSizeInBits() <
7716 LocVT.getFixedSizeInBits())
7717 // f32 in 64-bit GPR.
7718 RegsToPass.push_back(std::make_pair(
7719 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7720 else {
7721 // f64 in two 32-bit GPRs
7722 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7723 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7724 "Unexpected custom register for argument!");
7725 CCValAssign &GPR1 = VA;
7726 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7727 DAG.getConstant(32, dl, MVT::i8));
7728 RegsToPass.push_back(std::make_pair(
7729 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7730
7731 if (I != E) {
7732 // If only 1 GPR was available, there will only be one custom GPR and
7733 // the argument will also pass in memory.
7734 CCValAssign &PeekArg = ArgLocs[I];
7735 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7736 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7737 CCValAssign &GPR2 = ArgLocs[I++];
7738 RegsToPass.push_back(std::make_pair(
7739 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7740 }
7741 }
7742 }
7743 }
7744
7745 if (!MemOpChains.empty())
7746 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7747
7748 // For indirect calls, we need to save the TOC base to the stack for
7749 // restoration after the call.
7750 if (CFlags.IsIndirect) {
7751 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7752 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7753 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7754 const MVT PtrVT = Subtarget.getScalarIntVT();
7755 const unsigned TOCSaveOffset =
7756 Subtarget.getFrameLowering()->getTOCSaveOffset();
7757
7758 setUsesTOCBasePtr(DAG);
7759 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7760 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7761 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7762 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7763 Chain = DAG.getStore(
7764 Val.getValue(1), dl, Val, AddPtr,
7765 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7766 }
7767
7768 // Build a sequence of copy-to-reg nodes chained together with token chain
7769 // and flag operands which copy the outgoing args into the appropriate regs.
7770 SDValue InGlue;
7771 for (auto Reg : RegsToPass) {
7772 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7773 InGlue = Chain.getValue(1);
7774 }
7775
7776 const int SPDiff = 0;
7777 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7778 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7779}
7780
7781bool
7782PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7783 MachineFunction &MF, bool isVarArg,
7786 const Type *RetTy) const {
7788 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7789 return CCInfo.CheckReturn(
7790 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7792 : RetCC_PPC);
7793}
7794
7795SDValue
7796PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7797 bool isVarArg,
7799 const SmallVectorImpl<SDValue> &OutVals,
7800 const SDLoc &dl, SelectionDAG &DAG) const {
7802 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7803 *DAG.getContext());
7804 CCInfo.AnalyzeReturn(Outs,
7805 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7807 : RetCC_PPC);
7808
7809 SDValue Glue;
7810 SmallVector<SDValue, 4> RetOps(1, Chain);
7811
7812 // Copy the result values into the output registers.
7813 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7814 CCValAssign &VA = RVLocs[i];
7815 assert(VA.isRegLoc() && "Can only return in registers!");
7816
7817 SDValue Arg = OutVals[RealResIdx];
7818
7819 switch (VA.getLocInfo()) {
7820 default: llvm_unreachable("Unknown loc info!");
7821 case CCValAssign::Full: break;
7822 case CCValAssign::AExt:
7823 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7824 break;
7825 case CCValAssign::ZExt:
7826 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7827 break;
7828 case CCValAssign::SExt:
7829 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7830 break;
7831 }
7832 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7833 bool isLittleEndian = Subtarget.isLittleEndian();
7834 // Legalize ret f64 -> ret 2 x i32.
7835 SDValue SVal =
7836 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7837 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7838 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7839 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7840 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7841 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7842 Glue = Chain.getValue(1);
7843 VA = RVLocs[++i]; // skip ahead to next loc
7844 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7845 } else
7846 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7847 Glue = Chain.getValue(1);
7848 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7849 }
7850
7851 RetOps[0] = Chain; // Update chain.
7852
7853 // Add the glue if we have it.
7854 if (Glue.getNode())
7855 RetOps.push_back(Glue);
7856
7857 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7858}
7859
7860SDValue
7861PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7862 SelectionDAG &DAG) const {
7863 SDLoc dl(Op);
7864
7865 // Get the correct type for integers.
7866 EVT IntVT = Op.getValueType();
7867
7868 // Get the inputs.
7869 SDValue Chain = Op.getOperand(0);
7870 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7871 // Build a DYNAREAOFFSET node.
7872 SDValue Ops[2] = {Chain, FPSIdx};
7873 SDVTList VTs = DAG.getVTList(IntVT);
7874 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7875}
7876
7877SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7878 SelectionDAG &DAG) const {
7879 // When we pop the dynamic allocation we need to restore the SP link.
7880 SDLoc dl(Op);
7881
7882 // Get the correct type for pointers.
7883 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7884
7885 // Construct the stack pointer operand.
7886 bool isPPC64 = Subtarget.isPPC64();
7887 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7888 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7889
7890 // Get the operands for the STACKRESTORE.
7891 SDValue Chain = Op.getOperand(0);
7892 SDValue SaveSP = Op.getOperand(1);
7893
7894 // Load the old link SP.
7895 SDValue LoadLinkSP =
7896 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7897
7898 // Restore the stack pointer.
7899 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7900
7901 // Store the old link SP.
7902 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7903}
7904
7905SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7906 MachineFunction &MF = DAG.getMachineFunction();
7907 bool isPPC64 = Subtarget.isPPC64();
7908 EVT PtrVT = getPointerTy(MF.getDataLayout());
7909
7910 // Get current frame pointer save index. The users of this index will be
7911 // primarily DYNALLOC instructions.
7912 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7913 int RASI = FI->getReturnAddrSaveIndex();
7914
7915 // If the frame pointer save index hasn't been defined yet.
7916 if (!RASI) {
7917 // Find out what the fix offset of the frame pointer save area.
7918 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7919 // Allocate the frame index for frame pointer save area.
7920 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7921 // Save the result.
7922 FI->setReturnAddrSaveIndex(RASI);
7923 }
7924 return DAG.getFrameIndex(RASI, PtrVT);
7925}
7926
7927SDValue
7928PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7929 MachineFunction &MF = DAG.getMachineFunction();
7930 bool isPPC64 = Subtarget.isPPC64();
7931 EVT PtrVT = getPointerTy(MF.getDataLayout());
7932
7933 // Get current frame pointer save index. The users of this index will be
7934 // primarily DYNALLOC instructions.
7935 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7936 int FPSI = FI->getFramePointerSaveIndex();
7937
7938 // If the frame pointer save index hasn't been defined yet.
7939 if (!FPSI) {
7940 // Find out what the fix offset of the frame pointer save area.
7941 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7942 // Allocate the frame index for frame pointer save area.
7943 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7944 // Save the result.
7945 FI->setFramePointerSaveIndex(FPSI);
7946 }
7947 return DAG.getFrameIndex(FPSI, PtrVT);
7948}
7949
7950SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7951 SelectionDAG &DAG) const {
7952 MachineFunction &MF = DAG.getMachineFunction();
7953 // Get the inputs.
7954 SDValue Chain = Op.getOperand(0);
7955 SDValue Size = Op.getOperand(1);
7956 SDLoc dl(Op);
7957
7958 // Get the correct type for pointers.
7959 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7960 // Negate the size.
7961 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7962 DAG.getConstant(0, dl, PtrVT), Size);
7963 // Construct a node for the frame pointer save index.
7964 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7965 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7966 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7967 if (hasInlineStackProbe(MF))
7968 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7969 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7970}
7971
7972SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7973 SelectionDAG &DAG) const {
7974 MachineFunction &MF = DAG.getMachineFunction();
7975
7976 bool isPPC64 = Subtarget.isPPC64();
7977 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7978
7979 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7980 return DAG.getFrameIndex(FI, PtrVT);
7981}
7982
7983SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7984 SelectionDAG &DAG) const {
7985 SDLoc DL(Op);
7986 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7987 DAG.getVTList(MVT::i32, MVT::Other),
7988 Op.getOperand(0), Op.getOperand(1));
7989}
7990
7991SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 SDLoc DL(Op);
7994 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7995 Op.getOperand(0), Op.getOperand(1));
7996}
7997
7998SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7999 if (Op.getValueType().isVector())
8000 return LowerVectorLoad(Op, DAG);
8001
8002 assert(Op.getValueType() == MVT::i1 &&
8003 "Custom lowering only for i1 loads");
8004
8005 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8006
8007 SDLoc dl(Op);
8008 LoadSDNode *LD = cast<LoadSDNode>(Op);
8009
8010 SDValue Chain = LD->getChain();
8011 SDValue BasePtr = LD->getBasePtr();
8012 MachineMemOperand *MMO = LD->getMemOperand();
8013
8014 SDValue NewLD =
8015 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8016 BasePtr, MVT::i8, MMO);
8017 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8018
8019 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8020 return DAG.getMergeValues(Ops, dl);
8021}
8022
8023SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8024 if (Op.getOperand(1).getValueType().isVector())
8025 return LowerVectorStore(Op, DAG);
8026
8027 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8028 "Custom lowering only for i1 stores");
8029
8030 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8031
8032 SDLoc dl(Op);
8033 StoreSDNode *ST = cast<StoreSDNode>(Op);
8034
8035 SDValue Chain = ST->getChain();
8036 SDValue BasePtr = ST->getBasePtr();
8037 SDValue Value = ST->getValue();
8038 MachineMemOperand *MMO = ST->getMemOperand();
8039
8041 Value);
8042 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8043}
8044
8045// FIXME: Remove this once the ANDI glue bug is fixed:
8046SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8047 assert(Op.getValueType() == MVT::i1 &&
8048 "Custom lowering only for i1 results");
8049
8050 SDLoc DL(Op);
8051 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8052}
8053
8054SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8055 SelectionDAG &DAG) const {
8056
8057 // Implements a vector truncate that fits in a vector register as a shuffle.
8058 // We want to legalize vector truncates down to where the source fits in
8059 // a vector register (and target is therefore smaller than vector register
8060 // size). At that point legalization will try to custom lower the sub-legal
8061 // result and get here - where we can contain the truncate as a single target
8062 // operation.
8063
8064 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8065 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8066 //
8067 // We will implement it for big-endian ordering as this (where x denotes
8068 // undefined):
8069 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8070 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8071 //
8072 // The same operation in little-endian ordering will be:
8073 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8074 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8075
8076 EVT TrgVT = Op.getValueType();
8077 assert(TrgVT.isVector() && "Vector type expected.");
8078 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8079 EVT EltVT = TrgVT.getVectorElementType();
8080 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8081 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8083 return SDValue();
8084
8085 SDValue N1 = Op.getOperand(0);
8086 EVT SrcVT = N1.getValueType();
8087 unsigned SrcSize = SrcVT.getSizeInBits();
8088 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8091 return SDValue();
8092 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8093 return SDValue();
8094
8095 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8096 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8097
8098 SDLoc DL(Op);
8099 SDValue Op1, Op2;
8100 if (SrcSize == 256) {
8101 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8102 EVT SplitVT =
8104 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8105 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8106 DAG.getConstant(0, DL, VecIdxTy));
8107 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8108 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8109 }
8110 else {
8111 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8112 Op2 = DAG.getUNDEF(WideVT);
8113 }
8114
8115 // First list the elements we want to keep.
8116 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8117 SmallVector<int, 16> ShuffV;
8118 if (Subtarget.isLittleEndian())
8119 for (unsigned i = 0; i < TrgNumElts; ++i)
8120 ShuffV.push_back(i * SizeMult);
8121 else
8122 for (unsigned i = 1; i <= TrgNumElts; ++i)
8123 ShuffV.push_back(i * SizeMult - 1);
8124
8125 // Populate the remaining elements with undefs.
8126 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8127 // ShuffV.push_back(i + WideNumElts);
8128 ShuffV.push_back(WideNumElts + 1);
8129
8130 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8131 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8132 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8133}
8134
8135/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8136/// possible.
8137SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8138 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8139 EVT ResVT = Op.getValueType();
8140 EVT CmpVT = Op.getOperand(0).getValueType();
8141 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8142 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8143 SDLoc dl(Op);
8144
8145 // Without power9-vector, we don't have native instruction for f128 comparison.
8146 // Following transformation to libcall is needed for setcc:
8147 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8148 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8149 SDValue Z = DAG.getSetCC(
8150 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8151 LHS, RHS, CC);
8152 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8153 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8154 }
8155
8156 // Not FP, or using SPE? Not a fsel.
8157 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8158 Subtarget.hasSPE())
8159 return Op;
8160
8161 SDNodeFlags Flags = Op.getNode()->getFlags();
8162
8163 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8164 // presence of infinities.
8165 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8166 switch (CC) {
8167 default:
8168 break;
8169 case ISD::SETOGT:
8170 case ISD::SETGT:
8171 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8172 case ISD::SETOLT:
8173 case ISD::SETLT:
8174 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8175 }
8176 }
8177
8178 // We might be able to do better than this under some circumstances, but in
8179 // general, fsel-based lowering of select is a finite-math-only optimization.
8180 // For more information, see section F.3 of the 2.06 ISA specification.
8181 // With ISA 3.0
8182 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8183 return Op;
8184
8185 // If the RHS of the comparison is a 0.0, we don't need to do the
8186 // subtraction at all.
8187 SDValue Sel1;
8189 switch (CC) {
8190 default: break; // SETUO etc aren't handled by fsel.
8191 case ISD::SETNE:
8192 std::swap(TV, FV);
8193 [[fallthrough]];
8194 case ISD::SETEQ:
8195 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8196 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8197 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8198 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8199 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8200 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8201 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8202 case ISD::SETULT:
8203 case ISD::SETLT:
8204 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8205 [[fallthrough]];
8206 case ISD::SETOGE:
8207 case ISD::SETGE:
8208 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8209 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8210 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8211 case ISD::SETUGT:
8212 case ISD::SETGT:
8213 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8214 [[fallthrough]];
8215 case ISD::SETOLE:
8216 case ISD::SETLE:
8217 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8218 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8219 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8220 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8221 }
8222
8223 SDValue Cmp;
8224 switch (CC) {
8225 default: break; // SETUO etc aren't handled by fsel.
8226 case ISD::SETNE:
8227 std::swap(TV, FV);
8228 [[fallthrough]];
8229 case ISD::SETEQ:
8230 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8231 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8233 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8234 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8235 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8236 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8237 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8238 case ISD::SETULT:
8239 case ISD::SETLT:
8240 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8241 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8242 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8243 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8244 case ISD::SETOGE:
8245 case ISD::SETGE:
8246 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8247 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8248 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8249 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8250 case ISD::SETUGT:
8251 case ISD::SETGT:
8252 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8253 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8254 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8255 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8256 case ISD::SETOLE:
8257 case ISD::SETLE:
8258 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8259 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8260 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8261 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8262 }
8263 return Op;
8264}
8265
8266static unsigned getPPCStrictOpcode(unsigned Opc) {
8267 switch (Opc) {
8268 default:
8269 llvm_unreachable("No strict version of this opcode!");
8270 case PPCISD::FCTIDZ:
8271 return PPCISD::STRICT_FCTIDZ;
8272 case PPCISD::FCTIWZ:
8273 return PPCISD::STRICT_FCTIWZ;
8274 case PPCISD::FCTIDUZ:
8275 return PPCISD::STRICT_FCTIDUZ;
8276 case PPCISD::FCTIWUZ:
8277 return PPCISD::STRICT_FCTIWUZ;
8278 case PPCISD::FCFID:
8279 return PPCISD::STRICT_FCFID;
8280 case PPCISD::FCFIDU:
8281 return PPCISD::STRICT_FCFIDU;
8282 case PPCISD::FCFIDS:
8283 return PPCISD::STRICT_FCFIDS;
8284 case PPCISD::FCFIDUS:
8285 return PPCISD::STRICT_FCFIDUS;
8286 }
8287}
8288
8290 const PPCSubtarget &Subtarget) {
8291 SDLoc dl(Op);
8292 bool IsStrict = Op->isStrictFPOpcode();
8293 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8294 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8295
8296 // TODO: Any other flags to propagate?
8297 SDNodeFlags Flags;
8298 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8299
8300 // For strict nodes, source is the second operand.
8301 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8302 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8303 MVT DestTy = Op.getSimpleValueType();
8304 assert(Src.getValueType().isFloatingPoint() &&
8305 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8306 DestTy == MVT::i64) &&
8307 "Invalid FP_TO_INT types");
8308 if (Src.getValueType() == MVT::f32) {
8309 if (IsStrict) {
8310 Src =
8312 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8313 Chain = Src.getValue(1);
8314 } else
8315 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8316 }
8317 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8318 DestTy = Subtarget.getScalarIntVT();
8319 unsigned Opc = ISD::DELETED_NODE;
8320 switch (DestTy.SimpleTy) {
8321 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8322 case MVT::i32:
8323 Opc = IsSigned ? PPCISD::FCTIWZ
8324 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8325 break;
8326 case MVT::i64:
8327 assert((IsSigned || Subtarget.hasFPCVT()) &&
8328 "i64 FP_TO_UINT is supported only with FPCVT");
8329 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8330 }
8331 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8332 SDValue Conv;
8333 if (IsStrict) {
8335 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8336 Flags);
8337 } else {
8338 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8339 }
8340 return Conv;
8341}
8342
8343void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8344 SelectionDAG &DAG,
8345 const SDLoc &dl) const {
8346 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8347 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8348 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8349 bool IsStrict = Op->isStrictFPOpcode();
8350
8351 // Convert the FP value to an int value through memory.
8352 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8353 (IsSigned || Subtarget.hasFPCVT());
8354 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8355 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8356 MachinePointerInfo MPI =
8358
8359 // Emit a store to the stack slot.
8360 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8361 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8362 if (i32Stack) {
8363 MachineFunction &MF = DAG.getMachineFunction();
8364 Alignment = Align(4);
8365 MachineMemOperand *MMO =
8366 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8367 SDValue Ops[] = { Chain, Tmp, FIPtr };
8368 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8369 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8370 } else
8371 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8372
8373 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8374 // add in a bias on big endian.
8375 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8376 !Subtarget.isLittleEndian()) {
8377 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8378 DAG.getConstant(4, dl, FIPtr.getValueType()));
8379 MPI = MPI.getWithOffset(4);
8380 }
8381
8382 RLI.Chain = Chain;
8383 RLI.Ptr = FIPtr;
8384 RLI.MPI = MPI;
8385 RLI.Alignment = Alignment;
8386}
8387
8388/// Custom lowers floating point to integer conversions to use
8389/// the direct move instructions available in ISA 2.07 to avoid the
8390/// need for load/store combinations.
8391SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8392 SelectionDAG &DAG,
8393 const SDLoc &dl) const {
8394 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8395 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8396 if (Op->isStrictFPOpcode())
8397 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8398 else
8399 return Mov;
8400}
8401
8402SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8403 const SDLoc &dl) const {
8404 bool IsStrict = Op->isStrictFPOpcode();
8405 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8406 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8407 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8408 EVT SrcVT = Src.getValueType();
8409 EVT DstVT = Op.getValueType();
8410
8411 // FP to INT conversions are legal for f128.
8412 if (SrcVT == MVT::f128)
8413 return Subtarget.hasP9Vector() ? Op : SDValue();
8414
8415 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8416 // PPC (the libcall is not available).
8417 if (SrcVT == MVT::ppcf128) {
8418 if (DstVT == MVT::i32) {
8419 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8420 // set other fast-math flags to FP operations in both strict and
8421 // non-strict cases. (FP_TO_SINT, FSUB)
8422 SDNodeFlags Flags;
8423 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8424
8425 if (IsSigned) {
8426 SDValue Lo, Hi;
8427 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8428
8429 // Add the two halves of the long double in round-to-zero mode, and use
8430 // a smaller FP_TO_SINT.
8431 if (IsStrict) {
8432 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8433 DAG.getVTList(MVT::f64, MVT::Other),
8434 {Op.getOperand(0), Lo, Hi}, Flags);
8435 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8436 DAG.getVTList(MVT::i32, MVT::Other),
8437 {Res.getValue(1), Res}, Flags);
8438 } else {
8439 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8440 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8441 }
8442 } else {
8443 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8444 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8445 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8446 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8447 if (IsStrict) {
8448 // Sel = Src < 0x80000000
8449 // FltOfs = select Sel, 0.0, 0x80000000
8450 // IntOfs = select Sel, 0, 0x80000000
8451 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8452 SDValue Chain = Op.getOperand(0);
8453 EVT SetCCVT =
8454 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8455 EVT DstSetCCVT =
8456 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8457 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8458 Chain, true);
8459 Chain = Sel.getValue(1);
8460
8461 SDValue FltOfs = DAG.getSelect(
8462 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8463 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8464
8465 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8466 DAG.getVTList(SrcVT, MVT::Other),
8467 {Chain, Src, FltOfs}, Flags);
8468 Chain = Val.getValue(1);
8469 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8470 DAG.getVTList(DstVT, MVT::Other),
8471 {Chain, Val}, Flags);
8472 Chain = SInt.getValue(1);
8473 SDValue IntOfs = DAG.getSelect(
8474 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8475 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8476 return DAG.getMergeValues({Result, Chain}, dl);
8477 } else {
8478 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8479 // FIXME: generated code sucks.
8480 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8481 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8482 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8483 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8484 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8485 }
8486 }
8487 }
8488
8489 return SDValue();
8490 }
8491
8492 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8493 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8494
8495 ReuseLoadInfo RLI;
8496 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8497
8498 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8499 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8500}
8501
8502// We're trying to insert a regular store, S, and then a load, L. If the
8503// incoming value, O, is a load, we might just be able to have our load use the
8504// address used by O. However, we don't know if anything else will store to
8505// that address before we can load from it. To prevent this situation, we need
8506// to insert our load, L, into the chain as a peer of O. To do this, we give L
8507// the same chain operand as O, we create a token factor from the chain results
8508// of O and L, and we replace all uses of O's chain result with that token
8509// factor (this last part is handled by makeEquivalentMemoryOrdering).
8510bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8511 ReuseLoadInfo &RLI,
8512 SelectionDAG &DAG,
8513 ISD::LoadExtType ET) const {
8514 // Conservatively skip reusing for constrained FP nodes.
8515 if (Op->isStrictFPOpcode())
8516 return false;
8517
8518 SDLoc dl(Op);
8519 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8520 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8521 if (ET == ISD::NON_EXTLOAD &&
8522 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8523 isOperationLegalOrCustom(Op.getOpcode(),
8524 Op.getOperand(0).getValueType())) {
8525
8526 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8527 return true;
8528 }
8529
8530 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8531 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8532 LD->isNonTemporal())
8533 return false;
8534 if (LD->getMemoryVT() != MemVT)
8535 return false;
8536
8537 // If the result of the load is an illegal type, then we can't build a
8538 // valid chain for reuse since the legalised loads and token factor node that
8539 // ties the legalised loads together uses a different output chain then the
8540 // illegal load.
8541 if (!isTypeLegal(LD->getValueType(0)))
8542 return false;
8543
8544 RLI.Ptr = LD->getBasePtr();
8545 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8546 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8547 "Non-pre-inc AM on PPC?");
8548 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8549 LD->getOffset());
8550 }
8551
8552 RLI.Chain = LD->getChain();
8553 RLI.MPI = LD->getPointerInfo();
8554 RLI.IsDereferenceable = LD->isDereferenceable();
8555 RLI.IsInvariant = LD->isInvariant();
8556 RLI.Alignment = LD->getAlign();
8557 RLI.AAInfo = LD->getAAInfo();
8558 RLI.Ranges = LD->getRanges();
8559
8560 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8561 return true;
8562}
8563
8564/// Analyze profitability of direct move
8565/// prefer float load to int load plus direct move
8566/// when there is no integer use of int load
8567bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8568 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8569 if (Origin->getOpcode() != ISD::LOAD)
8570 return true;
8571
8572 // If there is no LXSIBZX/LXSIHZX, like Power8,
8573 // prefer direct move if the memory size is 1 or 2 bytes.
8574 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8575 if (!Subtarget.hasP9Vector() &&
8576 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8577 return true;
8578
8579 for (SDUse &Use : Origin->uses()) {
8580
8581 // Only look at the users of the loaded value.
8582 if (Use.getResNo() != 0)
8583 continue;
8584
8585 SDNode *User = Use.getUser();
8586 if (User->getOpcode() != ISD::SINT_TO_FP &&
8587 User->getOpcode() != ISD::UINT_TO_FP &&
8588 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8589 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8590 return true;
8591 }
8592
8593 return false;
8594}
8595
8597 const PPCSubtarget &Subtarget,
8598 SDValue Chain = SDValue()) {
8599 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8600 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8601 SDLoc dl(Op);
8602
8603 // TODO: Any other flags to propagate?
8604 SDNodeFlags Flags;
8605 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8606
8607 // If we have FCFIDS, then use it when converting to single-precision.
8608 // Otherwise, convert to double-precision and then round.
8609 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8610 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8611 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8612 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8613 if (Op->isStrictFPOpcode()) {
8614 if (!Chain)
8615 Chain = Op.getOperand(0);
8616 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8617 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8618 } else
8619 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8620}
8621
8622/// Custom lowers integer to floating point conversions to use
8623/// the direct move instructions available in ISA 2.07 to avoid the
8624/// need for load/store combinations.
8625SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8626 SelectionDAG &DAG,
8627 const SDLoc &dl) const {
8628 assert((Op.getValueType() == MVT::f32 ||
8629 Op.getValueType() == MVT::f64) &&
8630 "Invalid floating point type as target of conversion");
8631 assert(Subtarget.hasFPCVT() &&
8632 "Int to FP conversions with direct moves require FPCVT");
8633 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8634 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8635 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8636 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8637 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8638 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8639 return convertIntToFP(Op, Mov, DAG, Subtarget);
8640}
8641
8642static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8643
8644 EVT VecVT = Vec.getValueType();
8645 assert(VecVT.isVector() && "Expected a vector type.");
8646 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8647
8648 EVT EltVT = VecVT.getVectorElementType();
8649 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8650 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8651
8652 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8653 SmallVector<SDValue, 16> Ops(NumConcat);
8654 Ops[0] = Vec;
8655 SDValue UndefVec = DAG.getUNDEF(VecVT);
8656 for (unsigned i = 1; i < NumConcat; ++i)
8657 Ops[i] = UndefVec;
8658
8659 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8660}
8661
8662SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8663 const SDLoc &dl) const {
8664 bool IsStrict = Op->isStrictFPOpcode();
8665 unsigned Opc = Op.getOpcode();
8666 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8669 "Unexpected conversion type");
8670 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8671 "Supports conversions to v2f64/v4f32 only.");
8672
8673 // TODO: Any other flags to propagate?
8674 SDNodeFlags Flags;
8675 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8676
8677 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8678 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8679
8680 SDValue Wide = widenVec(DAG, Src, dl);
8681 EVT WideVT = Wide.getValueType();
8682 unsigned WideNumElts = WideVT.getVectorNumElements();
8683 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8684
8685 SmallVector<int, 16> ShuffV;
8686 for (unsigned i = 0; i < WideNumElts; ++i)
8687 ShuffV.push_back(i + WideNumElts);
8688
8689 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8690 int SaveElts = FourEltRes ? 4 : 2;
8691 if (Subtarget.isLittleEndian())
8692 for (int i = 0; i < SaveElts; i++)
8693 ShuffV[i * Stride] = i;
8694 else
8695 for (int i = 1; i <= SaveElts; i++)
8696 ShuffV[i * Stride - 1] = i - 1;
8697
8698 SDValue ShuffleSrc2 =
8699 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8700 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8701
8702 SDValue Extend;
8703 if (SignedConv) {
8704 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8705 EVT ExtVT = Src.getValueType();
8706 if (Subtarget.hasP9Altivec())
8707 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8708 IntermediateVT.getVectorNumElements());
8709
8710 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8711 DAG.getValueType(ExtVT));
8712 } else
8713 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8714
8715 if (IsStrict)
8716 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8717 {Op.getOperand(0), Extend}, Flags);
8718
8719 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8720}
8721
8722SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8723 SelectionDAG &DAG) const {
8724 SDLoc dl(Op);
8725 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8726 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8727 bool IsStrict = Op->isStrictFPOpcode();
8728 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8729 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8730
8731 // TODO: Any other flags to propagate?
8732 SDNodeFlags Flags;
8733 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8734
8735 EVT InVT = Src.getValueType();
8736 EVT OutVT = Op.getValueType();
8737 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8738 isOperationCustom(Op.getOpcode(), InVT))
8739 return LowerINT_TO_FPVector(Op, DAG, dl);
8740
8741 // Conversions to f128 are legal.
8742 if (Op.getValueType() == MVT::f128)
8743 return Subtarget.hasP9Vector() ? Op : SDValue();
8744
8745 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8746 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8747 return SDValue();
8748
8749 if (Src.getValueType() == MVT::i1) {
8750 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8751 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8752 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8753 if (IsStrict)
8754 return DAG.getMergeValues({Sel, Chain}, dl);
8755 else
8756 return Sel;
8757 }
8758
8759 // If we have direct moves, we can do all the conversion, skip the store/load
8760 // however, without FPCVT we can't do most conversions.
8761 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8762 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8763 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8764
8765 assert((IsSigned || Subtarget.hasFPCVT()) &&
8766 "UINT_TO_FP is supported only with FPCVT");
8767
8768 if (Src.getValueType() == MVT::i64) {
8769 SDValue SINT = Src;
8770 // When converting to single-precision, we actually need to convert
8771 // to double-precision first and then round to single-precision.
8772 // To avoid double-rounding effects during that operation, we have
8773 // to prepare the input operand. Bits that might be truncated when
8774 // converting to double-precision are replaced by a bit that won't
8775 // be lost at this stage, but is below the single-precision rounding
8776 // position.
8777 //
8778 // However, if afn is in effect, accept double
8779 // rounding to avoid the extra overhead.
8780 // FIXME: Currently INT_TO_FP can't support fast math flags because
8781 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8782 // false.
8783 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8784 !Op->getFlags().hasApproximateFuncs()) {
8785
8786 // Twiddle input to make sure the low 11 bits are zero. (If this
8787 // is the case, we are guaranteed the value will fit into the 53 bit
8788 // mantissa of an IEEE double-precision value without rounding.)
8789 // If any of those low 11 bits were not zero originally, make sure
8790 // bit 12 (value 2048) is set instead, so that the final rounding
8791 // to single-precision gets the correct result.
8792 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8793 SINT, DAG.getConstant(2047, dl, MVT::i64));
8794 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8795 Round, DAG.getConstant(2047, dl, MVT::i64));
8796 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8797 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8798 DAG.getSignedConstant(-2048, dl, MVT::i64));
8799
8800 // However, we cannot use that value unconditionally: if the magnitude
8801 // of the input value is small, the bit-twiddling we did above might
8802 // end up visibly changing the output. Fortunately, in that case, we
8803 // don't need to twiddle bits since the original input will convert
8804 // exactly to double-precision floating-point already. Therefore,
8805 // construct a conditional to use the original value if the top 11
8806 // bits are all sign-bit copies, and use the rounded value computed
8807 // above otherwise.
8808 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8809 SINT, DAG.getConstant(53, dl, MVT::i32));
8810 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8811 Cond, DAG.getConstant(1, dl, MVT::i64));
8812 Cond = DAG.getSetCC(
8813 dl,
8814 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8815 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8816
8817 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8818 }
8819
8820 ReuseLoadInfo RLI;
8821 SDValue Bits;
8822
8823 MachineFunction &MF = DAG.getMachineFunction();
8824 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8825 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8826 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8827 if (RLI.ResChain)
8828 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8829 } else if (Subtarget.hasLFIWAX() &&
8830 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8831 MachineMemOperand *MMO =
8833 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8834 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8835 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8836 DAG.getVTList(MVT::f64, MVT::Other),
8837 Ops, MVT::i32, MMO);
8838 if (RLI.ResChain)
8839 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8840 } else if (Subtarget.hasFPCVT() &&
8841 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8842 MachineMemOperand *MMO =
8844 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8845 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8846 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8847 DAG.getVTList(MVT::f64, MVT::Other),
8848 Ops, MVT::i32, MMO);
8849 if (RLI.ResChain)
8850 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8851 } else if (((Subtarget.hasLFIWAX() &&
8852 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8853 (Subtarget.hasFPCVT() &&
8854 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8855 SINT.getOperand(0).getValueType() == MVT::i32) {
8856 MachineFrameInfo &MFI = MF.getFrameInfo();
8857 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8858
8859 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8860 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8861
8862 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8864 DAG.getMachineFunction(), FrameIdx));
8865 Chain = Store;
8866
8867 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8868 "Expected an i32 store");
8869
8870 RLI.Ptr = FIdx;
8871 RLI.Chain = Chain;
8872 RLI.MPI =
8874 RLI.Alignment = Align(4);
8875
8876 MachineMemOperand *MMO =
8878 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8879 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8881 PPCISD::LFIWZX : PPCISD::LFIWAX,
8882 dl, DAG.getVTList(MVT::f64, MVT::Other),
8883 Ops, MVT::i32, MMO);
8884 Chain = Bits.getValue(1);
8885 } else
8886 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8887
8888 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8889 if (IsStrict)
8890 Chain = FP.getValue(1);
8891
8892 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8893 if (IsStrict)
8894 FP = DAG.getNode(
8895 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8896 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8897 Flags);
8898 else
8899 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8900 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8901 }
8902 return FP;
8903 }
8904
8905 assert(Src.getValueType() == MVT::i32 &&
8906 "Unhandled INT_TO_FP type in custom expander!");
8907 // Since we only generate this in 64-bit mode, we can take advantage of
8908 // 64-bit registers. In particular, sign extend the input value into the
8909 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8910 // then lfd it and fcfid it.
8911 MachineFunction &MF = DAG.getMachineFunction();
8912 MachineFrameInfo &MFI = MF.getFrameInfo();
8913 EVT PtrVT = getPointerTy(MF.getDataLayout());
8914
8915 SDValue Ld;
8916 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8917 ReuseLoadInfo RLI;
8918 bool ReusingLoad;
8919 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8920 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8921 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8922
8923 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8925 DAG.getMachineFunction(), FrameIdx));
8926 Chain = Store;
8927
8928 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8929 "Expected an i32 store");
8930
8931 RLI.Ptr = FIdx;
8932 RLI.Chain = Chain;
8933 RLI.MPI =
8935 RLI.Alignment = Align(4);
8936 }
8937
8938 MachineMemOperand *MMO =
8940 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8941 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8942 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8943 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8944 MVT::i32, MMO);
8945 Chain = Ld.getValue(1);
8946 if (ReusingLoad && RLI.ResChain) {
8947 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8948 }
8949 } else {
8950 assert(Subtarget.isPPC64() &&
8951 "i32->FP without LFIWAX supported only on PPC64");
8952
8953 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8954 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8955
8956 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8957
8958 // STD the extended value into the stack slot.
8959 SDValue Store = DAG.getStore(
8960 Chain, dl, Ext64, FIdx,
8962 Chain = Store;
8963
8964 // Load the value as a double.
8965 Ld = DAG.getLoad(
8966 MVT::f64, dl, Chain, FIdx,
8968 Chain = Ld.getValue(1);
8969 }
8970
8971 // FCFID it and return it.
8972 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8973 if (IsStrict)
8974 Chain = FP.getValue(1);
8975 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8976 if (IsStrict)
8977 FP = DAG.getNode(
8978 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8979 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8980 else
8981 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8982 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8983 }
8984 return FP;
8985}
8986
8987SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8988 SelectionDAG &DAG) const {
8989 SDLoc Dl(Op);
8990 MachineFunction &MF = DAG.getMachineFunction();
8991 EVT PtrVT = getPointerTy(MF.getDataLayout());
8992 SDValue Chain = Op.getOperand(0);
8993
8994 // If requested mode is constant, just use simpler mtfsb/mffscrni
8995 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8996 uint64_t Mode = CVal->getZExtValue();
8997 assert(Mode < 4 && "Unsupported rounding mode!");
8998 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8999 if (Subtarget.isISA3_0())
9000 return SDValue(
9001 DAG.getMachineNode(
9002 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9003 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9004 1);
9005 SDNode *SetHi = DAG.getMachineNode(
9006 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9007 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9008 SDNode *SetLo = DAG.getMachineNode(
9009 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9010 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9011 return SDValue(SetLo, 0);
9012 }
9013
9014 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9015 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9016 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9017 DAG.getConstant(3, Dl, MVT::i32));
9018 SDValue DstFlag = DAG.getNode(
9019 ISD::XOR, Dl, MVT::i32, SrcFlag,
9020 DAG.getNode(ISD::AND, Dl, MVT::i32,
9021 DAG.getNOT(Dl,
9022 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9023 MVT::i32),
9024 One));
9025 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9026 SDValue MFFS;
9027 if (!Subtarget.isISA3_0()) {
9028 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9029 Chain = MFFS.getValue(1);
9030 }
9031 SDValue NewFPSCR;
9032 if (Subtarget.isPPC64()) {
9033 if (Subtarget.isISA3_0()) {
9034 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9035 } else {
9036 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9037 SDNode *InsertRN = DAG.getMachineNode(
9038 PPC::RLDIMI, Dl, MVT::i64,
9039 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9040 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9041 DAG.getTargetConstant(0, Dl, MVT::i32),
9042 DAG.getTargetConstant(62, Dl, MVT::i32)});
9043 NewFPSCR = SDValue(InsertRN, 0);
9044 }
9045 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9046 } else {
9047 // In 32-bit mode, store f64, load and update the lower half.
9048 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9049 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9050 SDValue Addr = Subtarget.isLittleEndian()
9051 ? StackSlot
9052 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9053 DAG.getConstant(4, Dl, PtrVT));
9054 if (Subtarget.isISA3_0()) {
9055 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9056 } else {
9057 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9058 SDValue Tmp =
9059 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9060 Chain = Tmp.getValue(1);
9061 Tmp = SDValue(DAG.getMachineNode(
9062 PPC::RLWIMI, Dl, MVT::i32,
9063 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9064 DAG.getTargetConstant(30, Dl, MVT::i32),
9065 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9066 0);
9067 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9068 }
9069 NewFPSCR =
9070 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9071 Chain = NewFPSCR.getValue(1);
9072 }
9073 if (Subtarget.isISA3_0())
9074 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9075 {NewFPSCR, Chain}),
9076 1);
9077 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9078 SDNode *MTFSF = DAG.getMachineNode(
9079 PPC::MTFSF, Dl, MVT::Other,
9080 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9081 return SDValue(MTFSF, 0);
9082}
9083
9084SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9085 SelectionDAG &DAG) const {
9086 SDLoc dl(Op);
9087 /*
9088 The rounding mode is in bits 30:31 of FPSR, and has the following
9089 settings:
9090 00 Round to nearest
9091 01 Round to 0
9092 10 Round to +inf
9093 11 Round to -inf
9094
9095 GET_ROUNDING, on the other hand, expects the following:
9096 -1 Undefined
9097 0 Round to 0
9098 1 Round to nearest
9099 2 Round to +inf
9100 3 Round to -inf
9101
9102 To perform the conversion, we do:
9103 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9104 */
9105
9106 MachineFunction &MF = DAG.getMachineFunction();
9107 EVT VT = Op.getValueType();
9108 EVT PtrVT = getPointerTy(MF.getDataLayout());
9109
9110 // Save FP Control Word to register
9111 SDValue Chain = Op.getOperand(0);
9112 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9113 Chain = MFFS.getValue(1);
9114
9115 SDValue CWD;
9116 if (isTypeLegal(MVT::i64)) {
9117 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9118 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9119 } else {
9120 // Save FP register to stack slot
9121 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9122 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9123 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9124
9125 // Load FP Control Word from low 32 bits of stack slot.
9127 "Stack slot adjustment is valid only on big endian subtargets!");
9128 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9129 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9130 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9131 Chain = CWD.getValue(1);
9132 }
9133
9134 // Transform as necessary
9135 SDValue CWD1 =
9136 DAG.getNode(ISD::AND, dl, MVT::i32,
9137 CWD, DAG.getConstant(3, dl, MVT::i32));
9138 SDValue CWD2 =
9139 DAG.getNode(ISD::SRL, dl, MVT::i32,
9140 DAG.getNode(ISD::AND, dl, MVT::i32,
9141 DAG.getNode(ISD::XOR, dl, MVT::i32,
9142 CWD, DAG.getConstant(3, dl, MVT::i32)),
9143 DAG.getConstant(3, dl, MVT::i32)),
9144 DAG.getConstant(1, dl, MVT::i32));
9145
9146 SDValue RetVal =
9147 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9148
9149 RetVal =
9151 dl, VT, RetVal);
9152
9153 return DAG.getMergeValues({RetVal, Chain}, dl);
9154}
9155
9156SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9157 EVT VT = Op.getValueType();
9158 uint64_t BitWidth = VT.getSizeInBits();
9159 SDLoc dl(Op);
9160 assert(Op.getNumOperands() == 3 &&
9161 VT == Op.getOperand(1).getValueType() &&
9162 "Unexpected SHL!");
9163
9164 // Expand into a bunch of logical ops. Note that these ops
9165 // depend on the PPC behavior for oversized shift amounts.
9166 SDValue Lo = Op.getOperand(0);
9167 SDValue Hi = Op.getOperand(1);
9168 SDValue Amt = Op.getOperand(2);
9169 EVT AmtVT = Amt.getValueType();
9170
9171 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9172 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9173 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9174 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9175 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9176 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9177 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9178 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9179 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9180 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9181 SDValue OutOps[] = { OutLo, OutHi };
9182 return DAG.getMergeValues(OutOps, dl);
9183}
9184
9185SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9186 EVT VT = Op.getValueType();
9187 SDLoc dl(Op);
9188 uint64_t BitWidth = VT.getSizeInBits();
9189 assert(Op.getNumOperands() == 3 &&
9190 VT == Op.getOperand(1).getValueType() &&
9191 "Unexpected SRL!");
9192
9193 // Expand into a bunch of logical ops. Note that these ops
9194 // depend on the PPC behavior for oversized shift amounts.
9195 SDValue Lo = Op.getOperand(0);
9196 SDValue Hi = Op.getOperand(1);
9197 SDValue Amt = Op.getOperand(2);
9198 EVT AmtVT = Amt.getValueType();
9199
9200 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9201 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9202 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9203 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9204 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9205 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9206 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9207 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9208 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9209 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9210 SDValue OutOps[] = { OutLo, OutHi };
9211 return DAG.getMergeValues(OutOps, dl);
9212}
9213
9214SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9215 SDLoc dl(Op);
9216 EVT VT = Op.getValueType();
9217 uint64_t BitWidth = VT.getSizeInBits();
9218 assert(Op.getNumOperands() == 3 &&
9219 VT == Op.getOperand(1).getValueType() &&
9220 "Unexpected SRA!");
9221
9222 // Expand into a bunch of logical ops, followed by a select_cc.
9223 SDValue Lo = Op.getOperand(0);
9224 SDValue Hi = Op.getOperand(1);
9225 SDValue Amt = Op.getOperand(2);
9226 EVT AmtVT = Amt.getValueType();
9227
9228 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9229 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9230 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9231 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9232 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9233 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9234 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9235 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9236 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9237 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9238 Tmp4, Tmp6, ISD::SETLE);
9239 SDValue OutOps[] = { OutLo, OutHi };
9240 return DAG.getMergeValues(OutOps, dl);
9241}
9242
9243SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9244 SelectionDAG &DAG) const {
9245 SDLoc dl(Op);
9246 EVT VT = Op.getValueType();
9247 unsigned BitWidth = VT.getSizeInBits();
9248
9249 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9250 SDValue X = Op.getOperand(0);
9251 SDValue Y = Op.getOperand(1);
9252 SDValue Z = Op.getOperand(2);
9253 EVT AmtVT = Z.getValueType();
9254
9255 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9256 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9257 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9258 // on PowerPC shift by BW being well defined.
9259 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9260 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9261 SDValue SubZ =
9262 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9263 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9264 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9265 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9266}
9267
9268//===----------------------------------------------------------------------===//
9269// Vector related lowering.
9270//
9271
9272/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9273/// element size of SplatSize. Cast the result to VT.
9274static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9275 SelectionDAG &DAG, const SDLoc &dl) {
9276 static const MVT VTys[] = { // canonical VT to use for each size.
9277 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9278 };
9279
9280 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9281
9282 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9283 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9284 SplatSize = 1;
9285 Val = 0xFF;
9286 }
9287
9288 EVT CanonicalVT = VTys[SplatSize-1];
9289
9290 // Build a canonical splat for this value.
9291 // Explicitly truncate APInt here, as this API is used with a mix of
9292 // signed and unsigned values.
9293 return DAG.getBitcast(
9294 ReqVT,
9295 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9296}
9297
9298/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9299/// specified intrinsic ID.
9301 const SDLoc &dl, EVT DestVT = MVT::Other) {
9302 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9303 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9304 DAG.getConstant(IID, dl, MVT::i32), Op);
9305}
9306
9307/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9308/// specified intrinsic ID.
9310 SelectionDAG &DAG, const SDLoc &dl,
9311 EVT DestVT = MVT::Other) {
9312 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9313 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9314 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9315}
9316
9317/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9318/// specified intrinsic ID.
9319static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9320 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9321 EVT DestVT = MVT::Other) {
9322 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9323 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9324 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9325}
9326
9327/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9328/// amount. The result has the specified value type.
9329static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9330 SelectionDAG &DAG, const SDLoc &dl) {
9331 // Force LHS/RHS to be the right type.
9332 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9333 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9334
9335 int Ops[16];
9336 for (unsigned i = 0; i != 16; ++i)
9337 Ops[i] = i + Amt;
9338 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9339 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9340}
9341
9342/// Do we have an efficient pattern in a .td file for this node?
9343///
9344/// \param V - pointer to the BuildVectorSDNode being matched
9345/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9346///
9347/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9348/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9349/// the opposite is true (expansion is beneficial) are:
9350/// - The node builds a vector out of integers that are not 32 or 64-bits
9351/// - The node builds a vector out of constants
9352/// - The node is a "load-and-splat"
9353/// In all other cases, we will choose to keep the BUILD_VECTOR.
9355 bool HasDirectMove,
9356 bool HasP8Vector) {
9357 EVT VecVT = V->getValueType(0);
9358 bool RightType = VecVT == MVT::v2f64 ||
9359 (HasP8Vector && VecVT == MVT::v4f32) ||
9360 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9361 if (!RightType)
9362 return false;
9363
9364 bool IsSplat = true;
9365 bool IsLoad = false;
9366 SDValue Op0 = V->getOperand(0);
9367
9368 // This function is called in a block that confirms the node is not a constant
9369 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9370 // different constants.
9371 if (V->isConstant())
9372 return false;
9373 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9374 if (V->getOperand(i).isUndef())
9375 return false;
9376 // We want to expand nodes that represent load-and-splat even if the
9377 // loaded value is a floating point truncation or conversion to int.
9378 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9379 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9380 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9381 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9382 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9383 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9384 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9385 IsLoad = true;
9386 // If the operands are different or the input is not a load and has more
9387 // uses than just this BV node, then it isn't a splat.
9388 if (V->getOperand(i) != Op0 ||
9389 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9390 IsSplat = false;
9391 }
9392 return !(IsSplat && IsLoad);
9393}
9394
9395// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9396SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9397
9398 SDLoc dl(Op);
9399 SDValue Op0 = Op->getOperand(0);
9400
9401 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9402 (Op.getValueType() != MVT::f128))
9403 return SDValue();
9404
9405 SDValue Lo = Op0.getOperand(0);
9406 SDValue Hi = Op0.getOperand(1);
9407 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9408 return SDValue();
9409
9410 if (!Subtarget.isLittleEndian())
9411 std::swap(Lo, Hi);
9412
9413 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9414}
9415
9416static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9417 const SDValue *InputLoad = &Op;
9418 while (InputLoad->getOpcode() == ISD::BITCAST)
9419 InputLoad = &InputLoad->getOperand(0);
9420 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9421 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9422 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9423 InputLoad = &InputLoad->getOperand(0);
9424 }
9425 if (InputLoad->getOpcode() != ISD::LOAD)
9426 return nullptr;
9427 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9428 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9429}
9430
9431// Convert the argument APFloat to a single precision APFloat if there is no
9432// loss in information during the conversion to single precision APFloat and the
9433// resulting number is not a denormal number. Return true if successful.
9435 APFloat APFloatToConvert = ArgAPFloat;
9436 bool LosesInfo = true;
9438 &LosesInfo);
9439 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9440 if (Success)
9441 ArgAPFloat = APFloatToConvert;
9442 return Success;
9443}
9444
9445// Bitcast the argument APInt to a double and convert it to a single precision
9446// APFloat, bitcast the APFloat to an APInt and assign it to the original
9447// argument if there is no loss in information during the conversion from
9448// double to single precision APFloat and the resulting number is not a denormal
9449// number. Return true if successful.
9451 double DpValue = ArgAPInt.bitsToDouble();
9452 APFloat APFloatDp(DpValue);
9453 bool Success = convertToNonDenormSingle(APFloatDp);
9454 if (Success)
9455 ArgAPInt = APFloatDp.bitcastToAPInt();
9456 return Success;
9457}
9458
9459// Nondestructive check for convertTonNonDenormSingle.
9461 // Only convert if it loses info, since XXSPLTIDP should
9462 // handle the other case.
9463 APFloat APFloatToConvert = ArgAPFloat;
9464 bool LosesInfo = true;
9466 &LosesInfo);
9467
9468 return (!LosesInfo && !APFloatToConvert.isDenormal());
9469}
9470
9471static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9472 unsigned &Opcode) {
9473 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9474 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9475 return false;
9476
9477 EVT Ty = Op->getValueType(0);
9478 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9479 // as we cannot handle extending loads for these types.
9480 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9481 ISD::isNON_EXTLoad(InputNode))
9482 return true;
9483
9484 EVT MemVT = InputNode->getMemoryVT();
9485 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9486 // memory VT is the same vector element VT type.
9487 // The loads feeding into the v8i16 and v16i8 types will be extending because
9488 // scalar i8/i16 are not legal types.
9489 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9490 (MemVT == Ty.getVectorElementType()))
9491 return true;
9492
9493 if (Ty == MVT::v2i64) {
9494 // Check the extend type, when the input type is i32, and the output vector
9495 // type is v2i64.
9496 if (MemVT == MVT::i32) {
9497 if (ISD::isZEXTLoad(InputNode))
9498 Opcode = PPCISD::ZEXT_LD_SPLAT;
9499 if (ISD::isSEXTLoad(InputNode))
9500 Opcode = PPCISD::SEXT_LD_SPLAT;
9501 }
9502 return true;
9503 }
9504 return false;
9505}
9506
9508 bool IsLittleEndian) {
9509 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9510
9511 BitMask.clearAllBits();
9512 EVT VT = BVN.getValueType(0);
9513 unsigned VTSize = VT.getSizeInBits();
9514 APInt ConstValue(VTSize, 0);
9515
9516 unsigned EltWidth = VT.getScalarSizeInBits();
9517
9518 unsigned BitPos = 0;
9519 for (auto OpVal : BVN.op_values()) {
9520 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9521
9522 if (!CN)
9523 return false;
9524 // The elements in a vector register are ordered in reverse byte order
9525 // between little-endian and big-endian modes.
9526 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9527 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9528 BitPos += EltWidth;
9529 }
9530
9531 for (unsigned J = 0; J < 16; ++J) {
9532 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9533 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9534 return false;
9535 if (ExtractValue == 0xFF)
9536 BitMask.setBit(J);
9537 }
9538 return true;
9539}
9540
9541// If this is a case we can't handle, return null and let the default
9542// expansion code take care of it. If we CAN select this case, and if it
9543// selects to a single instruction, return Op. Otherwise, if we can codegen
9544// this case more efficiently than a constant pool load, lower it to the
9545// sequence of ops that should be used.
9546SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9547 SelectionDAG &DAG) const {
9548 SDLoc dl(Op);
9549 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9550 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9551
9552 if (Subtarget.hasP10Vector()) {
9553 APInt BitMask(32, 0);
9554 // If the value of the vector is all zeros or all ones,
9555 // we do not convert it to MTVSRBMI.
9556 // The xxleqv instruction sets a vector with all ones.
9557 // The xxlxor instruction sets a vector with all zeros.
9558 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9559 BitMask != 0 && BitMask != 0xffff) {
9560 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9561 MachineSDNode *MSDNode =
9562 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9563 SDValue SDV = SDValue(MSDNode, 0);
9564 EVT DVT = BVN->getValueType(0);
9565 EVT SVT = SDV.getValueType();
9566 if (SVT != DVT) {
9567 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9568 }
9569 return SDV;
9570 }
9571 // Recognize build vector patterns to emit VSX vector instructions
9572 // instead of loading value from memory.
9573 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9574 return VecPat;
9575 }
9576 // Check if this is a splat of a constant value.
9577 APInt APSplatBits, APSplatUndef;
9578 unsigned SplatBitSize;
9579 bool HasAnyUndefs;
9580 bool BVNIsConstantSplat =
9581 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9582 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9583
9584 // If it is a splat of a double, check if we can shrink it to a 32 bit
9585 // non-denormal float which when converted back to double gives us the same
9586 // double. This is to exploit the XXSPLTIDP instruction.
9587 // If we lose precision, we use XXSPLTI32DX.
9588 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9589 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9590 // Check the type first to short-circuit so we don't modify APSplatBits if
9591 // this block isn't executed.
9592 if ((Op->getValueType(0) == MVT::v2f64) &&
9593 convertToNonDenormSingle(APSplatBits)) {
9594 SDValue SplatNode = DAG.getNode(
9595 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9596 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9597 return DAG.getBitcast(Op.getValueType(), SplatNode);
9598 } else {
9599 // We may lose precision, so we have to use XXSPLTI32DX.
9600
9601 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9602 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9603 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9604
9605 if (!Hi || !Lo)
9606 // If either load is 0, then we should generate XXLXOR to set to 0.
9607 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9608
9609 if (Hi)
9610 SplatNode = DAG.getNode(
9611 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9612 DAG.getTargetConstant(0, dl, MVT::i32),
9613 DAG.getTargetConstant(Hi, dl, MVT::i32));
9614
9615 if (Lo)
9616 SplatNode =
9617 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9618 DAG.getTargetConstant(1, dl, MVT::i32),
9619 DAG.getTargetConstant(Lo, dl, MVT::i32));
9620
9621 return DAG.getBitcast(Op.getValueType(), SplatNode);
9622 }
9623 }
9624
9625 bool IsSplat64 = false;
9626 uint64_t SplatBits = 0;
9627 int32_t SextVal = 0;
9628 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9629 SplatBits = APSplatBits.getZExtValue();
9630 if (SplatBitSize <= 32) {
9631 SextVal = SignExtend32(SplatBits, SplatBitSize);
9632 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9633 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9634 bool P9Vector = Subtarget.hasP9Vector();
9635 int32_t Hi = P9Vector ? 127 : 15;
9636 int32_t Lo = P9Vector ? -128 : -16;
9637 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9638 SextVal = static_cast<int32_t>(SplatBits);
9639 }
9640 }
9641
9642 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9643 unsigned NewOpcode = PPCISD::LD_SPLAT;
9644
9645 // Handle load-and-splat patterns as we have instructions that will do this
9646 // in one go.
9647 if (DAG.isSplatValue(Op, true) &&
9648 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9649 const SDValue *InputLoad = &Op.getOperand(0);
9650 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9651
9652 // If the input load is an extending load, it will be an i32 -> i64
9653 // extending load and isValidSplatLoad() will update NewOpcode.
9654 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9655 unsigned ElementSize =
9656 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9657
9658 assert(((ElementSize == 2 * MemorySize)
9659 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9660 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9661 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9662 "Unmatched element size and opcode!\n");
9663
9664 // Checking for a single use of this load, we have to check for vector
9665 // width (128 bits) / ElementSize uses (since each operand of the
9666 // BUILD_VECTOR is a separate use of the value.
9667 unsigned NumUsesOfInputLD = 128 / ElementSize;
9668 for (SDValue BVInOp : Op->ops())
9669 if (BVInOp.isUndef())
9670 NumUsesOfInputLD--;
9671
9672 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9673 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9674 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9675 // 15", but function IsValidSplatLoad() now will only return true when
9676 // the data at index 0 is not nullptr. So we will not get into trouble for
9677 // these cases.
9678 //
9679 // case 1 - lfiwzx/lfiwax
9680 // 1.1: load result is i32 and is sign/zero extend to i64;
9681 // 1.2: build a v2i64 vector type with above loaded value;
9682 // 1.3: the vector has only one value at index 0, others are all undef;
9683 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9684 if (NumUsesOfInputLD == 1 &&
9685 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9686 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9687 Subtarget.hasLFIWAX()))
9688 return SDValue();
9689
9690 // case 2 - lxvr[hb]x
9691 // 2.1: load result is at most i16;
9692 // 2.2: build a vector with above loaded value;
9693 // 2.3: the vector has only one value at index 0, others are all undef;
9694 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9695 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9696 Subtarget.isISA3_1() && ElementSize <= 16)
9697 return SDValue();
9698
9699 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9700 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9701 Subtarget.hasVSX()) {
9702 SDValue Ops[] = {
9703 LD->getChain(), // Chain
9704 LD->getBasePtr(), // Ptr
9705 DAG.getValueType(Op.getValueType()) // VT
9706 };
9707 SDValue LdSplt = DAG.getMemIntrinsicNode(
9708 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9709 LD->getMemoryVT(), LD->getMemOperand());
9710 // Replace all uses of the output chain of the original load with the
9711 // output chain of the new load.
9712 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9713 LdSplt.getValue(1));
9714 return LdSplt;
9715 }
9716 }
9717
9718 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9719 // 32-bits can be lowered to VSX instructions under certain conditions.
9720 // Without VSX, there is no pattern more efficient than expanding the node.
9721 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9722 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9723 Subtarget.hasP8Vector()))
9724 return Op;
9725 return SDValue();
9726 }
9727
9728 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9729 unsigned SplatSize = SplatBitSize / 8;
9730
9731 // First, handle single instruction cases.
9732
9733 // All zeros?
9734 if (SplatBits == 0) {
9735 // Canonicalize all zero vectors to be v4i32.
9736 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9737 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9738 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9739 }
9740 return Op;
9741 }
9742
9743 // We have XXSPLTIW for constant splats four bytes wide.
9744 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9745 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9746 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9747 // turned into a 4-byte splat of 0xABABABAB.
9748 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9749 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9750 Op.getValueType(), DAG, dl);
9751
9752 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9753 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9754 dl);
9755
9756 // We have XXSPLTIB for constant splats one byte wide.
9757 if (Subtarget.hasP9Vector() && SplatSize == 1)
9758 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9759 dl);
9760
9761 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9762 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9763 if (SextVal >= -16 && SextVal <= 15) {
9764 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9765 // generate a splat word with extend for size 8.
9766 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9767 SDValue Res =
9768 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9769 if (SplatSize != 8)
9770 return Res;
9771 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9772 }
9773
9774 // Two instruction sequences.
9775
9776 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9777 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9779 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9780 unsigned IID;
9781 EVT VT;
9782 switch (SplatSize) {
9783 default:
9784 llvm_unreachable("Unexpected type for vector constant.");
9785 case 2:
9786 IID = Intrinsic::ppc_altivec_vupklsb;
9787 VT = MVT::v8i16;
9788 break;
9789 case 4:
9790 IID = Intrinsic::ppc_altivec_vextsb2w;
9791 VT = MVT::v4i32;
9792 break;
9793 case 8:
9794 IID = Intrinsic::ppc_altivec_vextsb2d;
9795 VT = MVT::v2i64;
9796 break;
9797 }
9798 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9799 return DAG.getBitcast(Op->getValueType(0), Extend);
9800 }
9801 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9802
9803 // If this value is in the range [-32,30] and is even, use:
9804 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9805 // If this value is in the range [17,31] and is odd, use:
9806 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9807 // If this value is in the range [-31,-17] and is odd, use:
9808 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9809 // Note the last two are three-instruction sequences.
9810 if (SextVal >= -32 && SextVal <= 31) {
9811 // To avoid having these optimizations undone by constant folding,
9812 // we convert to a pseudo that will be expanded later into one of
9813 // the above forms.
9814 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9815 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9816 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9817 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9818 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9819 if (VT == Op.getValueType())
9820 return RetVal;
9821 else
9822 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9823 }
9824
9825 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9826 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9827 // for fneg/fabs.
9828 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9829 // Make -1 and vspltisw -1:
9830 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9831
9832 // Make the VSLW intrinsic, computing 0x8000_0000.
9833 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9834 OnesV, DAG, dl);
9835
9836 // xor by OnesV to invert it.
9837 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9838 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9839 }
9840
9841 // Check to see if this is a wide variety of vsplti*, binop self cases.
9842 static const signed char SplatCsts[] = {
9843 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9844 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9845 };
9846
9847 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9848 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9849 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9850 int i = SplatCsts[idx];
9851
9852 // Figure out what shift amount will be used by altivec if shifted by i in
9853 // this splat size.
9854 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9855
9856 // vsplti + shl self.
9857 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9858 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9859 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9860 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9861 Intrinsic::ppc_altivec_vslw
9862 };
9863 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9864 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9865 }
9866
9867 // vsplti + srl self.
9868 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9869 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9870 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9871 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9872 Intrinsic::ppc_altivec_vsrw
9873 };
9874 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9875 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9876 }
9877
9878 // vsplti + rol self.
9879 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9880 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9881 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9882 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9883 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9884 Intrinsic::ppc_altivec_vrlw
9885 };
9886 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9887 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9888 }
9889
9890 // t = vsplti c, result = vsldoi t, t, 1
9891 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9892 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9893 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9894 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9895 }
9896 // t = vsplti c, result = vsldoi t, t, 2
9897 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9898 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9899 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9900 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9901 }
9902 // t = vsplti c, result = vsldoi t, t, 3
9903 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9904 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9905 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9906 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9907 }
9908 }
9909
9910 return SDValue();
9911}
9912
9913/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9914/// the specified operations to build the shuffle.
9916 SDValue RHS, SelectionDAG &DAG,
9917 const SDLoc &dl) {
9918 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9919 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9920 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9921
9922 enum {
9923 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9924 OP_VMRGHW,
9925 OP_VMRGLW,
9926 OP_VSPLTISW0,
9927 OP_VSPLTISW1,
9928 OP_VSPLTISW2,
9929 OP_VSPLTISW3,
9930 OP_VSLDOI4,
9931 OP_VSLDOI8,
9932 OP_VSLDOI12
9933 };
9934
9935 if (OpNum == OP_COPY) {
9936 if (LHSID == (1*9+2)*9+3) return LHS;
9937 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9938 return RHS;
9939 }
9940
9941 SDValue OpLHS, OpRHS;
9942 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9943 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9944
9945 int ShufIdxs[16];
9946 switch (OpNum) {
9947 default: llvm_unreachable("Unknown i32 permute!");
9948 case OP_VMRGHW:
9949 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9950 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9951 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9952 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9953 break;
9954 case OP_VMRGLW:
9955 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9956 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9957 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9958 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9959 break;
9960 case OP_VSPLTISW0:
9961 for (unsigned i = 0; i != 16; ++i)
9962 ShufIdxs[i] = (i&3)+0;
9963 break;
9964 case OP_VSPLTISW1:
9965 for (unsigned i = 0; i != 16; ++i)
9966 ShufIdxs[i] = (i&3)+4;
9967 break;
9968 case OP_VSPLTISW2:
9969 for (unsigned i = 0; i != 16; ++i)
9970 ShufIdxs[i] = (i&3)+8;
9971 break;
9972 case OP_VSPLTISW3:
9973 for (unsigned i = 0; i != 16; ++i)
9974 ShufIdxs[i] = (i&3)+12;
9975 break;
9976 case OP_VSLDOI4:
9977 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9978 case OP_VSLDOI8:
9979 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9980 case OP_VSLDOI12:
9981 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9982 }
9983 EVT VT = OpLHS.getValueType();
9984 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9985 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9986 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9987 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9988}
9989
9990/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9991/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9992/// SDValue.
9993SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9994 SelectionDAG &DAG) const {
9995 const unsigned BytesInVector = 16;
9996 bool IsLE = Subtarget.isLittleEndian();
9997 SDLoc dl(N);
9998 SDValue V1 = N->getOperand(0);
9999 SDValue V2 = N->getOperand(1);
10000 unsigned ShiftElts = 0, InsertAtByte = 0;
10001 bool Swap = false;
10002
10003 // Shifts required to get the byte we want at element 7.
10004 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10005 0, 15, 14, 13, 12, 11, 10, 9};
10006 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10007 1, 2, 3, 4, 5, 6, 7, 8};
10008
10009 ArrayRef<int> Mask = N->getMask();
10010 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10011
10012 // For each mask element, find out if we're just inserting something
10013 // from V2 into V1 or vice versa.
10014 // Possible permutations inserting an element from V2 into V1:
10015 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10016 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10017 // ...
10018 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10019 // Inserting from V1 into V2 will be similar, except mask range will be
10020 // [16,31].
10021
10022 bool FoundCandidate = false;
10023 // If both vector operands for the shuffle are the same vector, the mask
10024 // will contain only elements from the first one and the second one will be
10025 // undef.
10026 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10027 // Go through the mask of half-words to find an element that's being moved
10028 // from one vector to the other.
10029 for (unsigned i = 0; i < BytesInVector; ++i) {
10030 unsigned CurrentElement = Mask[i];
10031 // If 2nd operand is undefined, we should only look for element 7 in the
10032 // Mask.
10033 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10034 continue;
10035
10036 bool OtherElementsInOrder = true;
10037 // Examine the other elements in the Mask to see if they're in original
10038 // order.
10039 for (unsigned j = 0; j < BytesInVector; ++j) {
10040 if (j == i)
10041 continue;
10042 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10043 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10044 // in which we always assume we're always picking from the 1st operand.
10045 int MaskOffset =
10046 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10047 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10048 OtherElementsInOrder = false;
10049 break;
10050 }
10051 }
10052 // If other elements are in original order, we record the number of shifts
10053 // we need to get the element we want into element 7. Also record which byte
10054 // in the vector we should insert into.
10055 if (OtherElementsInOrder) {
10056 // If 2nd operand is undefined, we assume no shifts and no swapping.
10057 if (V2.isUndef()) {
10058 ShiftElts = 0;
10059 Swap = false;
10060 } else {
10061 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10062 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10063 : BigEndianShifts[CurrentElement & 0xF];
10064 Swap = CurrentElement < BytesInVector;
10065 }
10066 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10067 FoundCandidate = true;
10068 break;
10069 }
10070 }
10071
10072 if (!FoundCandidate)
10073 return SDValue();
10074
10075 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10076 // optionally with VECSHL if shift is required.
10077 if (Swap)
10078 std::swap(V1, V2);
10079 if (V2.isUndef())
10080 V2 = V1;
10081 if (ShiftElts) {
10082 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10083 DAG.getConstant(ShiftElts, dl, MVT::i32));
10084 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10085 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10086 }
10087 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10088 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10089}
10090
10091/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10092/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10093/// SDValue.
10094SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10095 SelectionDAG &DAG) const {
10096 const unsigned NumHalfWords = 8;
10097 const unsigned BytesInVector = NumHalfWords * 2;
10098 // Check that the shuffle is on half-words.
10099 if (!isNByteElemShuffleMask(N, 2, 1))
10100 return SDValue();
10101
10102 bool IsLE = Subtarget.isLittleEndian();
10103 SDLoc dl(N);
10104 SDValue V1 = N->getOperand(0);
10105 SDValue V2 = N->getOperand(1);
10106 unsigned ShiftElts = 0, InsertAtByte = 0;
10107 bool Swap = false;
10108
10109 // Shifts required to get the half-word we want at element 3.
10110 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10111 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10112
10113 uint32_t Mask = 0;
10114 uint32_t OriginalOrderLow = 0x1234567;
10115 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10116 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10117 // 32-bit space, only need 4-bit nibbles per element.
10118 for (unsigned i = 0; i < NumHalfWords; ++i) {
10119 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10120 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10121 }
10122
10123 // For each mask element, find out if we're just inserting something
10124 // from V2 into V1 or vice versa. Possible permutations inserting an element
10125 // from V2 into V1:
10126 // X, 1, 2, 3, 4, 5, 6, 7
10127 // 0, X, 2, 3, 4, 5, 6, 7
10128 // 0, 1, X, 3, 4, 5, 6, 7
10129 // 0, 1, 2, X, 4, 5, 6, 7
10130 // 0, 1, 2, 3, X, 5, 6, 7
10131 // 0, 1, 2, 3, 4, X, 6, 7
10132 // 0, 1, 2, 3, 4, 5, X, 7
10133 // 0, 1, 2, 3, 4, 5, 6, X
10134 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10135
10136 bool FoundCandidate = false;
10137 // Go through the mask of half-words to find an element that's being moved
10138 // from one vector to the other.
10139 for (unsigned i = 0; i < NumHalfWords; ++i) {
10140 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10141 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10142 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10143 uint32_t TargetOrder = 0x0;
10144
10145 // If both vector operands for the shuffle are the same vector, the mask
10146 // will contain only elements from the first one and the second one will be
10147 // undef.
10148 if (V2.isUndef()) {
10149 ShiftElts = 0;
10150 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10151 TargetOrder = OriginalOrderLow;
10152 Swap = false;
10153 // Skip if not the correct element or mask of other elements don't equal
10154 // to our expected order.
10155 if (MaskOneElt == VINSERTHSrcElem &&
10156 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10157 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10158 FoundCandidate = true;
10159 break;
10160 }
10161 } else { // If both operands are defined.
10162 // Target order is [8,15] if the current mask is between [0,7].
10163 TargetOrder =
10164 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10165 // Skip if mask of other elements don't equal our expected order.
10166 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10167 // We only need the last 3 bits for the number of shifts.
10168 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10169 : BigEndianShifts[MaskOneElt & 0x7];
10170 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10171 Swap = MaskOneElt < NumHalfWords;
10172 FoundCandidate = true;
10173 break;
10174 }
10175 }
10176 }
10177
10178 if (!FoundCandidate)
10179 return SDValue();
10180
10181 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10182 // optionally with VECSHL if shift is required.
10183 if (Swap)
10184 std::swap(V1, V2);
10185 if (V2.isUndef())
10186 V2 = V1;
10187 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10188 if (ShiftElts) {
10189 // Double ShiftElts because we're left shifting on v16i8 type.
10190 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10191 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10192 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10193 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10194 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10195 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10196 }
10197 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10198 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10199 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10200 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10201}
10202
10203/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10204/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10205/// return the default SDValue.
10206SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10207 SelectionDAG &DAG) const {
10208 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10209 // to v16i8. Peek through the bitcasts to get the actual operands.
10212
10213 auto ShuffleMask = SVN->getMask();
10214 SDValue VecShuffle(SVN, 0);
10215 SDLoc DL(SVN);
10216
10217 // Check that we have a four byte shuffle.
10218 if (!isNByteElemShuffleMask(SVN, 4, 1))
10219 return SDValue();
10220
10221 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10222 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10223 std::swap(LHS, RHS);
10225 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10226 if (!CommutedSV)
10227 return SDValue();
10228 ShuffleMask = CommutedSV->getMask();
10229 }
10230
10231 // Ensure that the RHS is a vector of constants.
10232 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10233 if (!BVN)
10234 return SDValue();
10235
10236 // Check if RHS is a splat of 4-bytes (or smaller).
10237 APInt APSplatValue, APSplatUndef;
10238 unsigned SplatBitSize;
10239 bool HasAnyUndefs;
10240 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10241 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10242 SplatBitSize > 32)
10243 return SDValue();
10244
10245 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10246 // The instruction splats a constant C into two words of the source vector
10247 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10248 // Thus we check that the shuffle mask is the equivalent of
10249 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10250 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10251 // within each word are consecutive, so we only need to check the first byte.
10252 SDValue Index;
10253 bool IsLE = Subtarget.isLittleEndian();
10254 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10255 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10256 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10257 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10258 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10259 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10260 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10261 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10262 else
10263 return SDValue();
10264
10265 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10266 // for XXSPLTI32DX.
10267 unsigned SplatVal = APSplatValue.getZExtValue();
10268 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10269 SplatVal |= (SplatVal << SplatBitSize);
10270
10271 SDValue SplatNode = DAG.getNode(
10272 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10273 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10274 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10275}
10276
10277/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10278/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10279/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10280/// i.e (or (shl x, C1), (srl x, 128-C1)).
10281SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10282 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10283 assert(Op.getValueType() == MVT::v1i128 &&
10284 "Only set v1i128 as custom, other type shouldn't reach here!");
10285 SDLoc dl(Op);
10286 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10287 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10288 unsigned SHLAmt = N1.getConstantOperandVal(0);
10289 if (SHLAmt % 8 == 0) {
10290 std::array<int, 16> Mask;
10291 std::iota(Mask.begin(), Mask.end(), 0);
10292 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10293 if (SDValue Shuffle =
10294 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10295 DAG.getUNDEF(MVT::v16i8), Mask))
10296 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10297 }
10298 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10299 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10300 DAG.getConstant(SHLAmt, dl, MVT::i32));
10301 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10302 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10303 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10304 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10305}
10306
10307/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10308/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10309/// return the code it can be lowered into. Worst case, it can always be
10310/// lowered into a vperm.
10311SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10312 SelectionDAG &DAG) const {
10313 SDLoc dl(Op);
10314 SDValue V1 = Op.getOperand(0);
10315 SDValue V2 = Op.getOperand(1);
10316 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10317
10318 // Any nodes that were combined in the target-independent combiner prior
10319 // to vector legalization will not be sent to the target combine. Try to
10320 // combine it here.
10321 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10322 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10323 return NewShuffle;
10324 Op = NewShuffle;
10326 V1 = Op.getOperand(0);
10327 V2 = Op.getOperand(1);
10328 }
10329 EVT VT = Op.getValueType();
10330 bool isLittleEndian = Subtarget.isLittleEndian();
10331
10332 unsigned ShiftElts, InsertAtByte;
10333 bool Swap = false;
10334
10335 // If this is a load-and-splat, we can do that with a single instruction
10336 // in some cases. However if the load has multiple uses, we don't want to
10337 // combine it because that will just produce multiple loads.
10338 bool IsPermutedLoad = false;
10339 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10340 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10341 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10342 InputLoad->hasOneUse()) {
10343 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10344 int SplatIdx =
10345 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10346
10347 // The splat index for permuted loads will be in the left half of the vector
10348 // which is strictly wider than the loaded value by 8 bytes. So we need to
10349 // adjust the splat index to point to the correct address in memory.
10350 if (IsPermutedLoad) {
10351 assert((isLittleEndian || IsFourByte) &&
10352 "Unexpected size for permuted load on big endian target");
10353 SplatIdx += IsFourByte ? 2 : 1;
10354 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10355 "Splat of a value outside of the loaded memory");
10356 }
10357
10358 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10359 // For 4-byte load-and-splat, we need Power9.
10360 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10361 uint64_t Offset = 0;
10362 if (IsFourByte)
10363 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10364 else
10365 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10366
10367 // If the width of the load is the same as the width of the splat,
10368 // loading with an offset would load the wrong memory.
10369 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10370 Offset = 0;
10371
10372 SDValue BasePtr = LD->getBasePtr();
10373 if (Offset != 0)
10375 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10376 SDValue Ops[] = {
10377 LD->getChain(), // Chain
10378 BasePtr, // BasePtr
10379 DAG.getValueType(Op.getValueType()) // VT
10380 };
10381 SDVTList VTL =
10382 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10383 SDValue LdSplt =
10384 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10385 Ops, LD->getMemoryVT(), LD->getMemOperand());
10386 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10387 if (LdSplt.getValueType() != SVOp->getValueType(0))
10388 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10389 return LdSplt;
10390 }
10391 }
10392
10393 // All v2i64 and v2f64 shuffles are legal
10394 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10395 return Op;
10396
10397 if (Subtarget.hasP9Vector() &&
10398 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10399 isLittleEndian)) {
10400 if (V2.isUndef())
10401 V2 = V1;
10402 else if (Swap)
10403 std::swap(V1, V2);
10404 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10405 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10406 if (ShiftElts) {
10407 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10408 DAG.getConstant(ShiftElts, dl, MVT::i32));
10409 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10410 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10411 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10412 }
10413 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10414 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10415 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10416 }
10417
10418 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10419 SDValue SplatInsertNode;
10420 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10421 return SplatInsertNode;
10422 }
10423
10424 if (Subtarget.hasP9Altivec()) {
10425 SDValue NewISDNode;
10426 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10427 return NewISDNode;
10428
10429 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10430 return NewISDNode;
10431 }
10432
10433 if (Subtarget.hasVSX() &&
10434 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10435 if (Swap)
10436 std::swap(V1, V2);
10437 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10438 SDValue Conv2 =
10439 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10440
10441 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10442 DAG.getConstant(ShiftElts, dl, MVT::i32));
10443 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10444 }
10445
10446 if (Subtarget.hasVSX() &&
10447 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10448 if (Swap)
10449 std::swap(V1, V2);
10450 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10451 SDValue Conv2 =
10452 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10453
10454 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10455 DAG.getConstant(ShiftElts, dl, MVT::i32));
10456 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10457 }
10458
10459 if (Subtarget.hasP9Vector()) {
10460 if (PPC::isXXBRHShuffleMask(SVOp)) {
10461 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10462 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10463 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10464 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10465 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10466 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10467 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10468 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10469 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10470 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10471 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10472 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10473 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10474 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10475 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10476 }
10477 }
10478
10479 if (Subtarget.hasVSX()) {
10480 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10481 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10482
10483 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10484 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10485 DAG.getConstant(SplatIdx, dl, MVT::i32));
10486 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10487 }
10488
10489 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10490 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10491 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10492 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10493 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10494 }
10495 }
10496
10497 // Cases that are handled by instructions that take permute immediates
10498 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10499 // selected by the instruction selector.
10500 if (V2.isUndef()) {
10501 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10502 PPC::isSplatShuffleMask(SVOp, 2) ||
10503 PPC::isSplatShuffleMask(SVOp, 4) ||
10504 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10505 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10506 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10507 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10508 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10509 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10510 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10511 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10512 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10513 (Subtarget.hasP8Altivec() && (
10514 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10515 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10516 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10517 return Op;
10518 }
10519 }
10520
10521 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10522 // and produce a fixed permutation. If any of these match, do not lower to
10523 // VPERM.
10524 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10525 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10526 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10527 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10528 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10529 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10530 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10531 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10532 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10533 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10534 (Subtarget.hasP8Altivec() && (
10535 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10536 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10537 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10538 return Op;
10539
10540 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10541 // perfect shuffle table to emit an optimal matching sequence.
10542 ArrayRef<int> PermMask = SVOp->getMask();
10543
10544 if (!DisablePerfectShuffle && !isLittleEndian) {
10545 unsigned PFIndexes[4];
10546 bool isFourElementShuffle = true;
10547 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10548 ++i) { // Element number
10549 unsigned EltNo = 8; // Start out undef.
10550 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10551 if (PermMask[i * 4 + j] < 0)
10552 continue; // Undef, ignore it.
10553
10554 unsigned ByteSource = PermMask[i * 4 + j];
10555 if ((ByteSource & 3) != j) {
10556 isFourElementShuffle = false;
10557 break;
10558 }
10559
10560 if (EltNo == 8) {
10561 EltNo = ByteSource / 4;
10562 } else if (EltNo != ByteSource / 4) {
10563 isFourElementShuffle = false;
10564 break;
10565 }
10566 }
10567 PFIndexes[i] = EltNo;
10568 }
10569
10570 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10571 // perfect shuffle vector to determine if it is cost effective to do this as
10572 // discrete instructions, or whether we should use a vperm.
10573 // For now, we skip this for little endian until such time as we have a
10574 // little-endian perfect shuffle table.
10575 if (isFourElementShuffle) {
10576 // Compute the index in the perfect shuffle table.
10577 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10578 PFIndexes[2] * 9 + PFIndexes[3];
10579
10580 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10581 unsigned Cost = (PFEntry >> 30);
10582
10583 // Determining when to avoid vperm is tricky. Many things affect the cost
10584 // of vperm, particularly how many times the perm mask needs to be
10585 // computed. For example, if the perm mask can be hoisted out of a loop or
10586 // is already used (perhaps because there are multiple permutes with the
10587 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10588 // permute mask out of the loop requires an extra register.
10589 //
10590 // As a compromise, we only emit discrete instructions if the shuffle can
10591 // be generated in 3 or fewer operations. When we have loop information
10592 // available, if this block is within a loop, we should avoid using vperm
10593 // for 3-operation perms and use a constant pool load instead.
10594 if (Cost < 3)
10595 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10596 }
10597 }
10598
10599 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10600 // vector that will get spilled to the constant pool.
10601 if (V2.isUndef()) V2 = V1;
10602
10603 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10604}
10605
10606SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10607 ArrayRef<int> PermMask, EVT VT,
10608 SDValue V1, SDValue V2) const {
10609 unsigned Opcode = PPCISD::VPERM;
10610 EVT ValType = V1.getValueType();
10611 SDLoc dl(Op);
10612 bool NeedSwap = false;
10613 bool isLittleEndian = Subtarget.isLittleEndian();
10614 bool isPPC64 = Subtarget.isPPC64();
10615
10616 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10617 (V1->hasOneUse() || V2->hasOneUse())) {
10618 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10619 "XXPERM instead\n");
10620 Opcode = PPCISD::XXPERM;
10621
10622 // The second input to XXPERM is also an output so if the second input has
10623 // multiple uses then copying is necessary, as a result we want the
10624 // single-use operand to be used as the second input to prevent copying.
10625 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10626 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10627 std::swap(V1, V2);
10628 NeedSwap = !NeedSwap;
10629 }
10630 }
10631
10632 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10633 // that it is in input element units, not in bytes. Convert now.
10634
10635 // For little endian, the order of the input vectors is reversed, and
10636 // the permutation mask is complemented with respect to 31. This is
10637 // necessary to produce proper semantics with the big-endian-based vperm
10638 // instruction.
10639 EVT EltVT = V1.getValueType().getVectorElementType();
10640 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10641
10642 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10643 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10644
10645 /*
10646 Vectors will be appended like so: [ V1 | v2 ]
10647 XXSWAPD on V1:
10648 [ A | B | C | D ] -> [ C | D | A | B ]
10649 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10650 i.e. index of A, B += 8, and index of C, D -= 8.
10651 XXSWAPD on V2:
10652 [ E | F | G | H ] -> [ G | H | E | F ]
10653 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10654 i.e. index of E, F += 8, index of G, H -= 8
10655 Swap V1 and V2:
10656 [ V1 | V2 ] -> [ V2 | V1 ]
10657 0-15 16-31 0-15 16-31
10658 i.e. index of V1 += 16, index of V2 -= 16
10659 */
10660
10661 SmallVector<SDValue, 16> ResultMask;
10662 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10663 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10664
10665 if (V1HasXXSWAPD) {
10666 if (SrcElt < 8)
10667 SrcElt += 8;
10668 else if (SrcElt < 16)
10669 SrcElt -= 8;
10670 }
10671 if (V2HasXXSWAPD) {
10672 if (SrcElt > 23)
10673 SrcElt -= 8;
10674 else if (SrcElt > 15)
10675 SrcElt += 8;
10676 }
10677 if (NeedSwap) {
10678 if (SrcElt < 16)
10679 SrcElt += 16;
10680 else
10681 SrcElt -= 16;
10682 }
10683 for (unsigned j = 0; j != BytesPerElement; ++j)
10684 if (isLittleEndian)
10685 ResultMask.push_back(
10686 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10687 else
10688 ResultMask.push_back(
10689 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10690 }
10691
10692 if (V1HasXXSWAPD) {
10693 dl = SDLoc(V1->getOperand(0));
10694 V1 = V1->getOperand(0)->getOperand(1);
10695 }
10696 if (V2HasXXSWAPD) {
10697 dl = SDLoc(V2->getOperand(0));
10698 V2 = V2->getOperand(0)->getOperand(1);
10699 }
10700
10701 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10702 if (ValType != MVT::v2f64)
10703 V1 = DAG.getBitcast(MVT::v2f64, V1);
10704 if (V2.getValueType() != MVT::v2f64)
10705 V2 = DAG.getBitcast(MVT::v2f64, V2);
10706 }
10707
10708 ShufflesHandledWithVPERM++;
10709 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10710 LLVM_DEBUG({
10711 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10712 if (Opcode == PPCISD::XXPERM) {
10713 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10714 } else {
10715 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10716 }
10717 SVOp->dump();
10718 dbgs() << "With the following permute control vector:\n";
10719 VPermMask.dump();
10720 });
10721
10722 if (Opcode == PPCISD::XXPERM)
10723 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10724
10725 // Only need to place items backwards in LE,
10726 // the mask was properly calculated.
10727 if (isLittleEndian)
10728 std::swap(V1, V2);
10729
10730 SDValue VPERMNode =
10731 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10732
10733 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10734 return VPERMNode;
10735}
10736
10737/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10738/// vector comparison. If it is, return true and fill in Opc/isDot with
10739/// information about the intrinsic.
10740static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10741 bool &isDot, const PPCSubtarget &Subtarget) {
10742 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10743 CompareOpc = -1;
10744 isDot = false;
10745 switch (IntrinsicID) {
10746 default:
10747 return false;
10748 // Comparison predicates.
10749 case Intrinsic::ppc_altivec_vcmpbfp_p:
10750 CompareOpc = 966;
10751 isDot = true;
10752 break;
10753 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10754 CompareOpc = 198;
10755 isDot = true;
10756 break;
10757 case Intrinsic::ppc_altivec_vcmpequb_p:
10758 CompareOpc = 6;
10759 isDot = true;
10760 break;
10761 case Intrinsic::ppc_altivec_vcmpequh_p:
10762 CompareOpc = 70;
10763 isDot = true;
10764 break;
10765 case Intrinsic::ppc_altivec_vcmpequw_p:
10766 CompareOpc = 134;
10767 isDot = true;
10768 break;
10769 case Intrinsic::ppc_altivec_vcmpequd_p:
10770 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10771 CompareOpc = 199;
10772 isDot = true;
10773 } else
10774 return false;
10775 break;
10776 case Intrinsic::ppc_altivec_vcmpneb_p:
10777 case Intrinsic::ppc_altivec_vcmpneh_p:
10778 case Intrinsic::ppc_altivec_vcmpnew_p:
10779 case Intrinsic::ppc_altivec_vcmpnezb_p:
10780 case Intrinsic::ppc_altivec_vcmpnezh_p:
10781 case Intrinsic::ppc_altivec_vcmpnezw_p:
10782 if (Subtarget.hasP9Altivec()) {
10783 switch (IntrinsicID) {
10784 default:
10785 llvm_unreachable("Unknown comparison intrinsic.");
10786 case Intrinsic::ppc_altivec_vcmpneb_p:
10787 CompareOpc = 7;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpneh_p:
10790 CompareOpc = 71;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnew_p:
10793 CompareOpc = 135;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezb_p:
10796 CompareOpc = 263;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpnezh_p:
10799 CompareOpc = 327;
10800 break;
10801 case Intrinsic::ppc_altivec_vcmpnezw_p:
10802 CompareOpc = 391;
10803 break;
10804 }
10805 isDot = true;
10806 } else
10807 return false;
10808 break;
10809 case Intrinsic::ppc_altivec_vcmpgefp_p:
10810 CompareOpc = 454;
10811 isDot = true;
10812 break;
10813 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10814 CompareOpc = 710;
10815 isDot = true;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10818 CompareOpc = 774;
10819 isDot = true;
10820 break;
10821 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10822 CompareOpc = 838;
10823 isDot = true;
10824 break;
10825 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10826 CompareOpc = 902;
10827 isDot = true;
10828 break;
10829 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10830 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10831 CompareOpc = 967;
10832 isDot = true;
10833 } else
10834 return false;
10835 break;
10836 case Intrinsic::ppc_altivec_vcmpgtub_p:
10837 CompareOpc = 518;
10838 isDot = true;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10841 CompareOpc = 582;
10842 isDot = true;
10843 break;
10844 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10845 CompareOpc = 646;
10846 isDot = true;
10847 break;
10848 case Intrinsic::ppc_altivec_vcmpgtud_p:
10849 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10850 CompareOpc = 711;
10851 isDot = true;
10852 } else
10853 return false;
10854 break;
10855
10856 case Intrinsic::ppc_altivec_vcmpequq:
10857 case Intrinsic::ppc_altivec_vcmpgtsq:
10858 case Intrinsic::ppc_altivec_vcmpgtuq:
10859 if (!Subtarget.isISA3_1())
10860 return false;
10861 switch (IntrinsicID) {
10862 default:
10863 llvm_unreachable("Unknown comparison intrinsic.");
10864 case Intrinsic::ppc_altivec_vcmpequq:
10865 CompareOpc = 455;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpgtsq:
10868 CompareOpc = 903;
10869 break;
10870 case Intrinsic::ppc_altivec_vcmpgtuq:
10871 CompareOpc = 647;
10872 break;
10873 }
10874 break;
10875
10876 // VSX predicate comparisons use the same infrastructure
10877 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10879 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10880 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10881 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10882 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10883 if (Subtarget.hasVSX()) {
10884 switch (IntrinsicID) {
10885 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10886 CompareOpc = 99;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10889 CompareOpc = 115;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10892 CompareOpc = 107;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10895 CompareOpc = 67;
10896 break;
10897 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10898 CompareOpc = 83;
10899 break;
10900 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10901 CompareOpc = 75;
10902 break;
10903 }
10904 isDot = true;
10905 } else
10906 return false;
10907 break;
10908
10909 // Normal Comparisons.
10910 case Intrinsic::ppc_altivec_vcmpbfp:
10911 CompareOpc = 966;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpeqfp:
10914 CompareOpc = 198;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequb:
10917 CompareOpc = 6;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequh:
10920 CompareOpc = 70;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpequw:
10923 CompareOpc = 134;
10924 break;
10925 case Intrinsic::ppc_altivec_vcmpequd:
10926 if (Subtarget.hasP8Altivec())
10927 CompareOpc = 199;
10928 else
10929 return false;
10930 break;
10931 case Intrinsic::ppc_altivec_vcmpneb:
10932 case Intrinsic::ppc_altivec_vcmpneh:
10933 case Intrinsic::ppc_altivec_vcmpnew:
10934 case Intrinsic::ppc_altivec_vcmpnezb:
10935 case Intrinsic::ppc_altivec_vcmpnezh:
10936 case Intrinsic::ppc_altivec_vcmpnezw:
10937 if (Subtarget.hasP9Altivec())
10938 switch (IntrinsicID) {
10939 default:
10940 llvm_unreachable("Unknown comparison intrinsic.");
10941 case Intrinsic::ppc_altivec_vcmpneb:
10942 CompareOpc = 7;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpneh:
10945 CompareOpc = 71;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnew:
10948 CompareOpc = 135;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezb:
10951 CompareOpc = 263;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpnezh:
10954 CompareOpc = 327;
10955 break;
10956 case Intrinsic::ppc_altivec_vcmpnezw:
10957 CompareOpc = 391;
10958 break;
10959 }
10960 else
10961 return false;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgefp:
10964 CompareOpc = 454;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtfp:
10967 CompareOpc = 710;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsb:
10970 CompareOpc = 774;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsh:
10973 CompareOpc = 838;
10974 break;
10975 case Intrinsic::ppc_altivec_vcmpgtsw:
10976 CompareOpc = 902;
10977 break;
10978 case Intrinsic::ppc_altivec_vcmpgtsd:
10979 if (Subtarget.hasP8Altivec())
10980 CompareOpc = 967;
10981 else
10982 return false;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtub:
10985 CompareOpc = 518;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtuh:
10988 CompareOpc = 582;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtuw:
10991 CompareOpc = 646;
10992 break;
10993 case Intrinsic::ppc_altivec_vcmpgtud:
10994 if (Subtarget.hasP8Altivec())
10995 CompareOpc = 711;
10996 else
10997 return false;
10998 break;
10999 case Intrinsic::ppc_altivec_vcmpequq_p:
11000 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11001 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11002 if (!Subtarget.isISA3_1())
11003 return false;
11004 switch (IntrinsicID) {
11005 default:
11006 llvm_unreachable("Unknown comparison intrinsic.");
11007 case Intrinsic::ppc_altivec_vcmpequq_p:
11008 CompareOpc = 455;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11011 CompareOpc = 903;
11012 break;
11013 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11014 CompareOpc = 647;
11015 break;
11016 }
11017 isDot = true;
11018 break;
11019 }
11020 return true;
11021}
11022
11023/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11024/// lower, do it, otherwise return null.
11025SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11026 SelectionDAG &DAG) const {
11027 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11028
11029 SDLoc dl(Op);
11030 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11031 // but the builtin provides it as a scalar. To satisfy the instruction
11032 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11033 auto MapNodeWithSplatVector =
11034 [&](unsigned Opcode,
11035 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11036 SDValue SplatVal =
11037 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11038
11039 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11040 Ops.append(ExtraOps.begin(), ExtraOps.end());
11041 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11042 };
11043
11044 switch (IntrinsicID) {
11045 case Intrinsic::thread_pointer:
11046 // Reads the thread pointer register, used for __builtin_thread_pointer.
11047 if (Subtarget.isPPC64())
11048 return DAG.getRegister(PPC::X13, MVT::i64);
11049 return DAG.getRegister(PPC::R2, MVT::i32);
11050
11051 case Intrinsic::ppc_rldimi: {
11052 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11053 SDValue Src = Op.getOperand(1);
11054 APInt Mask = Op.getConstantOperandAPInt(4);
11055 if (Mask.isZero())
11056 return Op.getOperand(2);
11057 if (Mask.isAllOnes())
11058 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11059 uint64_t SH = Op.getConstantOperandVal(3);
11060 unsigned MB = 0, ME = 0;
11061 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11062 report_fatal_error("invalid rldimi mask!");
11063 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11064 if (ME < 63 - SH) {
11065 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11066 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11067 } else if (ME > 63 - SH) {
11068 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11069 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11070 }
11071 return SDValue(
11072 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11073 {Op.getOperand(2), Src,
11074 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11075 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11076 0);
11077 }
11078
11079 case Intrinsic::ppc_rlwimi: {
11080 APInt Mask = Op.getConstantOperandAPInt(4);
11081 if (Mask.isZero())
11082 return Op.getOperand(2);
11083 if (Mask.isAllOnes())
11084 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11085 Op.getOperand(3));
11086 unsigned MB = 0, ME = 0;
11087 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11088 report_fatal_error("invalid rlwimi mask!");
11089 return SDValue(DAG.getMachineNode(
11090 PPC::RLWIMI, dl, MVT::i32,
11091 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11092 DAG.getTargetConstant(MB, dl, MVT::i32),
11093 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11094 0);
11095 }
11096
11097 case Intrinsic::ppc_bcdshift:
11098 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11099 case Intrinsic::ppc_bcdshiftround:
11100 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11101 case Intrinsic::ppc_bcdtruncate:
11102 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11103 case Intrinsic::ppc_bcdunsignedtruncate:
11104 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11105 case Intrinsic::ppc_bcdunsignedshift:
11106 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11107
11108 case Intrinsic::ppc_rlwnm: {
11109 if (Op.getConstantOperandVal(3) == 0)
11110 return DAG.getConstant(0, dl, MVT::i32);
11111 unsigned MB = 0, ME = 0;
11112 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11113 report_fatal_error("invalid rlwnm mask!");
11114 return SDValue(
11115 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11116 {Op.getOperand(1), Op.getOperand(2),
11117 DAG.getTargetConstant(MB, dl, MVT::i32),
11118 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11119 0);
11120 }
11121
11122 case Intrinsic::ppc_mma_disassemble_acc: {
11123 if (Subtarget.isISAFuture()) {
11124 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11125 SDValue WideVec =
11126 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11127 Op.getOperand(1)),
11128 0);
11130 SDValue Value = SDValue(WideVec.getNode(), 0);
11131 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11132
11133 SDValue Extract;
11134 Extract = DAG.getNode(
11135 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11136 Subtarget.isLittleEndian() ? Value2 : Value,
11137 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11138 dl, getPointerTy(DAG.getDataLayout())));
11139 RetOps.push_back(Extract);
11140 Extract = DAG.getNode(
11141 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11142 Subtarget.isLittleEndian() ? Value2 : Value,
11143 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11144 dl, getPointerTy(DAG.getDataLayout())));
11145 RetOps.push_back(Extract);
11146 Extract = DAG.getNode(
11147 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11148 Subtarget.isLittleEndian() ? Value : Value2,
11149 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11150 dl, getPointerTy(DAG.getDataLayout())));
11151 RetOps.push_back(Extract);
11152 Extract = DAG.getNode(
11153 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11154 Subtarget.isLittleEndian() ? Value : Value2,
11155 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11156 dl, getPointerTy(DAG.getDataLayout())));
11157 RetOps.push_back(Extract);
11158 return DAG.getMergeValues(RetOps, dl);
11159 }
11160 [[fallthrough]];
11161 }
11162 case Intrinsic::ppc_vsx_disassemble_pair: {
11163 int NumVecs = 2;
11164 SDValue WideVec = Op.getOperand(1);
11165 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11166 NumVecs = 4;
11167 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11168 }
11170 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11171 SDValue Extract = DAG.getNode(
11172 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11173 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11174 : VecNo,
11175 dl, getPointerTy(DAG.getDataLayout())));
11176 RetOps.push_back(Extract);
11177 }
11178 return DAG.getMergeValues(RetOps, dl);
11179 }
11180
11181 case Intrinsic::ppc_build_dmr: {
11184 for (int i = 1; i < 9; i += 2) {
11185 SDValue Hi = Op.getOperand(i);
11186 SDValue Lo = Op.getOperand(i + 1);
11187 if (Hi->getOpcode() == ISD::LOAD)
11188 Chains.push_back(Hi.getValue(1));
11189 if (Lo->getOpcode() == ISD::LOAD)
11190 Chains.push_back(Lo.getValue(1));
11191 Pairs.push_back(
11192 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11193 }
11194 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11195 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11196 return DAG.getMergeValues({Value, TF}, dl);
11197 }
11198
11199 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11200 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11201 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11202 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11203 "Specify P of 0 or 1 for lower or upper 512 bytes");
11204 unsigned HiLo = Idx->getSExtValue();
11205 unsigned Opcode;
11206 unsigned Subx;
11207 if (HiLo == 0) {
11208 Opcode = PPC::DMXXEXTFDMR512;
11209 Subx = PPC::sub_wacc_lo;
11210 } else {
11211 Opcode = PPC::DMXXEXTFDMR512_HI;
11212 Subx = PPC::sub_wacc_hi;
11213 }
11214 SDValue Subreg(
11215 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11216 Op.getOperand(1),
11217 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11218 0);
11219 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11220 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11221 }
11222
11223 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11224 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11225 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11226 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11227 "Specify a dmr row pair 0-3");
11228 unsigned IdxVal = Idx->getSExtValue();
11229 unsigned Subx;
11230 switch (IdxVal) {
11231 case 0:
11232 Subx = PPC::sub_dmrrowp0;
11233 break;
11234 case 1:
11235 Subx = PPC::sub_dmrrowp1;
11236 break;
11237 case 2:
11238 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11239 break;
11240 case 3:
11241 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11242 break;
11243 }
11244 SDValue Subreg(
11245 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11246 Op.getOperand(1),
11247 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11248 0);
11249 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11250 return SDValue(
11251 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11252 0);
11253 }
11254
11255 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11256 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11257 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11258 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11259 "Specify P of 0 or 1 for lower or upper 512 bytes");
11260 unsigned HiLo = Idx->getSExtValue();
11261 unsigned Opcode;
11262 unsigned Subx;
11263 if (HiLo == 0) {
11264 Opcode = PPCISD::INST512;
11265 Subx = PPC::sub_wacc_lo;
11266 } else {
11267 Opcode = PPCISD::INST512HI;
11268 Subx = PPC::sub_wacc_hi;
11269 }
11270 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11271 Op.getOperand(3));
11272 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11273 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11274 Op.getOperand(1), Wacc, SubReg),
11275 0);
11276 }
11277
11278 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11279 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11280 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11281 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11282 "Specify a dmr row pair 0-3");
11283 unsigned IdxVal = Idx->getSExtValue();
11284 unsigned Subx;
11285 switch (IdxVal) {
11286 case 0:
11287 Subx = PPC::sub_dmrrowp0;
11288 break;
11289 case 1:
11290 Subx = PPC::sub_dmrrowp1;
11291 break;
11292 case 2:
11293 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11294 break;
11295 case 3:
11296 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11297 break;
11298 }
11299 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11300 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11301 SDValue DMRRowp =
11302 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11303 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11304 Op.getOperand(1), DMRRowp, SubReg),
11305 0);
11306 }
11307
11308 case Intrinsic::ppc_mma_xxmfacc:
11309 case Intrinsic::ppc_mma_xxmtacc: {
11310 // Allow pre-isa-future subtargets to lower as normal.
11311 if (!Subtarget.isISAFuture())
11312 return SDValue();
11313 // The intrinsics for xxmtacc and xxmfacc take one argument of
11314 // type v512i1, for future cpu the corresponding wacc instruction
11315 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11316 // the need to produce the xxm[t|f]acc.
11317 SDValue WideVec = Op.getOperand(1);
11318 DAG.ReplaceAllUsesWith(Op, WideVec);
11319 return SDValue();
11320 }
11321
11322 case Intrinsic::ppc_unpack_longdouble: {
11323 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11324 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11325 "Argument of long double unpack must be 0 or 1!");
11326 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11327 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11328 Idx->getValueType(0)));
11329 }
11330
11331 case Intrinsic::ppc_compare_exp_lt:
11332 case Intrinsic::ppc_compare_exp_gt:
11333 case Intrinsic::ppc_compare_exp_eq:
11334 case Intrinsic::ppc_compare_exp_uo: {
11335 unsigned Pred;
11336 switch (IntrinsicID) {
11337 case Intrinsic::ppc_compare_exp_lt:
11338 Pred = PPC::PRED_LT;
11339 break;
11340 case Intrinsic::ppc_compare_exp_gt:
11341 Pred = PPC::PRED_GT;
11342 break;
11343 case Intrinsic::ppc_compare_exp_eq:
11344 Pred = PPC::PRED_EQ;
11345 break;
11346 case Intrinsic::ppc_compare_exp_uo:
11347 Pred = PPC::PRED_UN;
11348 break;
11349 }
11350 return SDValue(
11351 DAG.getMachineNode(
11352 PPC::SELECT_CC_I4, dl, MVT::i32,
11353 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11354 Op.getOperand(1), Op.getOperand(2)),
11355 0),
11356 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11357 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11358 0);
11359 }
11360 case Intrinsic::ppc_test_data_class: {
11361 EVT OpVT = Op.getOperand(1).getValueType();
11362 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11363 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11364 : PPC::XSTSTDCSP);
11365 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11366 // The XSTSTDC* instructions test if a floating-point value matches any of
11367 // the data classes specified in the mask, setting CR field bits
11368 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11369 // convert it to an integer result (1 if match, 0 if no match).
11370 //
11371 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11372 // intrinsic provides (value, mask) as Op.getOperand(1) and
11373 // Op.getOperand(2).
11374 SDValue TestDataClass =
11375 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11376 {Op.getOperand(2), Op.getOperand(1)}),
11377 0);
11378 if (Subtarget.isISA3_1()) {
11379 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11380 // This is more efficient than the SELECT_CC approach used in earlier
11381 // ISAs.
11382 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11383 SDValue CRBit =
11384 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11385 TestDataClass, SubRegIdx),
11386 0);
11387
11388 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11389 }
11390
11391 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11392 return SDValue(
11393 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11394 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11395 DAG.getConstant(0, dl, MVT::i32),
11396 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11397 0);
11398 }
11399 case Intrinsic::ppc_fnmsub: {
11400 EVT VT = Op.getOperand(1).getValueType();
11401 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11402 return DAG.getNode(
11403 ISD::FNEG, dl, VT,
11404 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11405 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11406 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11407 Op.getOperand(2), Op.getOperand(3));
11408 }
11409 case Intrinsic::ppc_convert_f128_to_ppcf128:
11410 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11411 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11412 ? RTLIB::CONVERT_PPCF128_F128
11413 : RTLIB::CONVERT_F128_PPCF128;
11414 MakeLibCallOptions CallOptions;
11415 std::pair<SDValue, SDValue> Result =
11416 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11417 dl, SDValue());
11418 return Result.first;
11419 }
11420 case Intrinsic::ppc_maxfe:
11421 case Intrinsic::ppc_maxfl:
11422 case Intrinsic::ppc_maxfs:
11423 case Intrinsic::ppc_minfe:
11424 case Intrinsic::ppc_minfl:
11425 case Intrinsic::ppc_minfs: {
11426 EVT VT = Op.getValueType();
11427 assert(
11428 all_of(Op->ops().drop_front(4),
11429 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11430 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11431 (void)VT;
11433 if (IntrinsicID == Intrinsic::ppc_minfe ||
11434 IntrinsicID == Intrinsic::ppc_minfl ||
11435 IntrinsicID == Intrinsic::ppc_minfs)
11436 CC = ISD::SETLT;
11437 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11438 SDValue Res = Op.getOperand(I);
11439 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11440 Res =
11441 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11442 }
11443 return Res;
11444 }
11445 }
11446
11447 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11448 // opcode number of the comparison.
11449 int CompareOpc;
11450 bool isDot;
11451 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11452 return SDValue(); // Don't custom lower most intrinsics.
11453
11454 // If this is a non-dot comparison, make the VCMP node and we are done.
11455 if (!isDot) {
11456 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11457 Op.getOperand(1), Op.getOperand(2),
11458 DAG.getConstant(CompareOpc, dl, MVT::i32));
11459 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11460 }
11461
11462 // Create the PPCISD altivec 'dot' comparison node.
11463 SDValue Ops[] = {
11464 Op.getOperand(2), // LHS
11465 Op.getOperand(3), // RHS
11466 DAG.getConstant(CompareOpc, dl, MVT::i32)
11467 };
11468 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11469 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11470
11471 // Unpack the result based on how the target uses it.
11472 unsigned BitNo; // Bit # of CR6.
11473 bool InvertBit; // Invert result?
11474 unsigned Bitx;
11475 unsigned SetOp;
11476 switch (Op.getConstantOperandVal(1)) {
11477 default: // Can't happen, don't crash on invalid number though.
11478 case 0: // Return the value of the EQ bit of CR6.
11479 BitNo = 0;
11480 InvertBit = false;
11481 Bitx = PPC::sub_eq;
11482 SetOp = PPCISD::SETBC;
11483 break;
11484 case 1: // Return the inverted value of the EQ bit of CR6.
11485 BitNo = 0;
11486 InvertBit = true;
11487 Bitx = PPC::sub_eq;
11488 SetOp = PPCISD::SETBCR;
11489 break;
11490 case 2: // Return the value of the LT bit of CR6.
11491 BitNo = 2;
11492 InvertBit = false;
11493 Bitx = PPC::sub_lt;
11494 SetOp = PPCISD::SETBC;
11495 break;
11496 case 3: // Return the inverted value of the LT bit of CR6.
11497 BitNo = 2;
11498 InvertBit = true;
11499 Bitx = PPC::sub_lt;
11500 SetOp = PPCISD::SETBCR;
11501 break;
11502 }
11503
11504 SDValue GlueOp = CompNode.getValue(1);
11505 if (Subtarget.isISA3_1()) {
11506 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11507 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11508 SDValue CRBit =
11509 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11510 CR6Reg, SubRegIdx, GlueOp),
11511 0);
11512 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11513 }
11514
11515 // Now that we have the comparison, emit a copy from the CR to a GPR.
11516 // This is flagged to the above dot comparison.
11517 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11518 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11519
11520 // Shift the bit into the low position.
11521 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11522 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11523 // Isolate the bit.
11524 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11525 DAG.getConstant(1, dl, MVT::i32));
11526
11527 // If we are supposed to, toggle the bit.
11528 if (InvertBit)
11529 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11530 DAG.getConstant(1, dl, MVT::i32));
11531 return Flags;
11532}
11533
11534SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11535 SelectionDAG &DAG) const {
11536 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11537 // the beginning of the argument list.
11538 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11539 SDLoc DL(Op);
11540 switch (Op.getConstantOperandVal(ArgStart)) {
11541 case Intrinsic::ppc_cfence: {
11542 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11543 SDValue Val = Op.getOperand(ArgStart + 1);
11544 EVT Ty = Val.getValueType();
11545 if (Ty == MVT::i128) {
11546 // FIXME: Testing one of two paired registers is sufficient to guarantee
11547 // ordering?
11548 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11549 }
11550 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11551 return SDValue(
11552 DAG.getMachineNode(
11553 Opcode, DL, MVT::Other,
11554 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11555 Op.getOperand(0)),
11556 0);
11557 }
11558 case Intrinsic::ppc_disassemble_dmr: {
11559 assert(ArgStart == 1 &&
11560 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11561 return DAG.getStore(Op.getOperand(0), DL, Op.getOperand(ArgStart + 2),
11562 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11563 }
11564 case Intrinsic::ppc_amo_stwat:
11565 case Intrinsic::ppc_amo_stdat: {
11566 SDLoc dl(Op);
11567 SDValue Chain = Op.getOperand(0);
11568 SDValue Ptr = Op.getOperand(ArgStart + 1);
11569 SDValue Val = Op.getOperand(ArgStart + 2);
11570 SDValue FC = Op.getOperand(ArgStart + 3);
11571
11572 return DAG.getNode(PPCISD::STAT, dl, MVT::Other, Chain, Val, Ptr, FC);
11573 }
11574 default:
11575 break;
11576 }
11577 return SDValue();
11578}
11579
11580// Lower scalar BSWAP64 to xxbrd.
11581SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11582 SDLoc dl(Op);
11583 if (!Subtarget.isPPC64())
11584 return Op;
11585 // MTVSRDD
11586 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11587 Op.getOperand(0));
11588 // XXBRD
11589 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11590 // MFVSRD
11591 int VectorIndex = 0;
11592 if (Subtarget.isLittleEndian())
11593 VectorIndex = 1;
11594 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11595 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11596 return Op;
11597}
11598
11599// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11600// compared to a value that is atomically loaded (atomic loads zero-extend).
11601SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11602 SelectionDAG &DAG) const {
11603 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11604 "Expecting an atomic compare-and-swap here.");
11605 SDLoc dl(Op);
11606 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11607 EVT MemVT = AtomicNode->getMemoryVT();
11608 if (MemVT.getSizeInBits() >= 32)
11609 return Op;
11610
11611 SDValue CmpOp = Op.getOperand(2);
11612 // If this is already correctly zero-extended, leave it alone.
11613 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11614 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11615 return Op;
11616
11617 // Clear the high bits of the compare operand.
11618 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11619 SDValue NewCmpOp =
11620 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11621 DAG.getConstant(MaskVal, dl, MVT::i32));
11622
11623 // Replace the existing compare operand with the properly zero-extended one.
11625 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11626 Ops.push_back(AtomicNode->getOperand(i));
11627 Ops[2] = NewCmpOp;
11628 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11629 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11630 auto NodeTy =
11631 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11632 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11633}
11634
11635SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11636 SelectionDAG &DAG) const {
11637 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11638 EVT MemVT = N->getMemoryVT();
11639 assert(MemVT.getSimpleVT() == MVT::i128 &&
11640 "Expect quadword atomic operations");
11641 SDLoc dl(N);
11642 unsigned Opc = N->getOpcode();
11643 switch (Opc) {
11644 case ISD::ATOMIC_LOAD: {
11645 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11646 // lowered to ppc instructions by pattern matching instruction selector.
11647 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11649 N->getOperand(0),
11650 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11651 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11652 Ops.push_back(N->getOperand(I));
11653 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11654 Ops, MemVT, N->getMemOperand());
11655 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11656 SDValue ValHi =
11657 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11658 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11659 DAG.getConstant(64, dl, MVT::i32));
11660 SDValue Val =
11661 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11662 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11663 {Val, LoadedVal.getValue(2)});
11664 }
11665 case ISD::ATOMIC_STORE: {
11666 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11667 // lowered to ppc instructions by pattern matching instruction selector.
11668 SDVTList Tys = DAG.getVTList(MVT::Other);
11670 N->getOperand(0),
11671 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11672 SDValue Val = N->getOperand(1);
11673 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11674 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11675 DAG.getConstant(64, dl, MVT::i32));
11676 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11677 Ops.push_back(ValLo);
11678 Ops.push_back(ValHi);
11679 Ops.push_back(N->getOperand(2));
11680 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11681 N->getMemOperand());
11682 }
11683 default:
11684 llvm_unreachable("Unexpected atomic opcode");
11685 }
11686}
11687
11689 SelectionDAG &DAG,
11690 const PPCSubtarget &Subtarget) {
11691 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11692
11693 enum DataClassMask {
11694 DC_NAN = 1 << 6,
11695 DC_NEG_INF = 1 << 4,
11696 DC_POS_INF = 1 << 5,
11697 DC_NEG_ZERO = 1 << 2,
11698 DC_POS_ZERO = 1 << 3,
11699 DC_NEG_SUBNORM = 1,
11700 DC_POS_SUBNORM = 1 << 1,
11701 };
11702
11703 EVT VT = Op.getValueType();
11704
11705 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11706 : VT == MVT::f64 ? PPC::XSTSTDCDP
11707 : PPC::XSTSTDCSP;
11708
11709 if (Mask == fcAllFlags)
11710 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11711 if (Mask == 0)
11712 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11713
11714 // When it's cheaper or necessary to test reverse flags.
11715 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11716 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11717 return DAG.getNOT(Dl, Rev, MVT::i1);
11718 }
11719
11720 // Power doesn't support testing whether a value is 'normal'. Test the rest
11721 // first, and test if it's 'not not-normal' with expected sign.
11722 if (Mask & fcNormal) {
11723 SDValue Rev(DAG.getMachineNode(
11724 TestOp, Dl, MVT::i32,
11725 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11726 DC_NEG_ZERO | DC_POS_ZERO |
11727 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11728 Dl, MVT::i32),
11729 Op),
11730 0);
11731 // Sign are stored in CR bit 0, result are in CR bit 2.
11732 SDValue Sign(
11733 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11734 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11735 0);
11736 SDValue Normal(DAG.getNOT(
11737 Dl,
11739 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11740 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11741 0),
11742 MVT::i1));
11743 if (Mask & fcPosNormal)
11744 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11745 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11746 if (Mask == fcPosNormal || Mask == fcNegNormal)
11747 return Result;
11748
11749 return DAG.getNode(
11750 ISD::OR, Dl, MVT::i1,
11751 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11752 }
11753
11754 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11755 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11756 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11757 bool IsQuiet = Mask & fcQNan;
11758 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11759
11760 // Quietness is determined by the first bit in fraction field.
11761 uint64_t QuietMask = 0;
11762 SDValue HighWord;
11763 if (VT == MVT::f128) {
11764 HighWord = DAG.getNode(
11765 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11766 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11767 QuietMask = 0x8000;
11768 } else if (VT == MVT::f64) {
11769 if (Subtarget.isPPC64()) {
11770 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11771 DAG.getBitcast(MVT::i64, Op),
11772 DAG.getConstant(1, Dl, MVT::i32));
11773 } else {
11774 SDValue Vec = DAG.getBitcast(
11775 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11776 HighWord = DAG.getNode(
11777 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11778 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11779 }
11780 QuietMask = 0x80000;
11781 } else if (VT == MVT::f32) {
11782 HighWord = DAG.getBitcast(MVT::i32, Op);
11783 QuietMask = 0x400000;
11784 }
11785 SDValue NanRes = DAG.getSetCC(
11786 Dl, MVT::i1,
11787 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11788 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11789 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11790 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11791 if (Mask == fcQNan || Mask == fcSNan)
11792 return NanRes;
11793
11794 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11795 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11796 NanRes);
11797 }
11798
11799 unsigned NativeMask = 0;
11800 if ((Mask & fcNan) == fcNan)
11801 NativeMask |= DC_NAN;
11802 if (Mask & fcNegInf)
11803 NativeMask |= DC_NEG_INF;
11804 if (Mask & fcPosInf)
11805 NativeMask |= DC_POS_INF;
11806 if (Mask & fcNegZero)
11807 NativeMask |= DC_NEG_ZERO;
11808 if (Mask & fcPosZero)
11809 NativeMask |= DC_POS_ZERO;
11810 if (Mask & fcNegSubnormal)
11811 NativeMask |= DC_NEG_SUBNORM;
11812 if (Mask & fcPosSubnormal)
11813 NativeMask |= DC_POS_SUBNORM;
11814 return SDValue(
11815 DAG.getMachineNode(
11816 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11818 TestOp, Dl, MVT::i32,
11819 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11820 0),
11821 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11822 0);
11823}
11824
11825SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11826 SelectionDAG &DAG) const {
11827 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11828 SDValue LHS = Op.getOperand(0);
11829 uint64_t RHSC = Op.getConstantOperandVal(1);
11830 SDLoc Dl(Op);
11831 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11832 if (LHS.getValueType() == MVT::ppcf128) {
11833 // The higher part determines the value class.
11834 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11835 DAG.getConstant(1, Dl, MVT::i32));
11836 }
11837
11838 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11839}
11840
11841// Adjust the length value for a load/store with length to account for the
11842// instructions requiring a left justified length, and for non-byte element
11843// types requiring scaling by element size.
11844static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11845 SelectionDAG &DAG) {
11846 SDLoc dl(Val);
11847 EVT VT = Val->getValueType(0);
11848 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11849 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11850 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11851 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11852}
11853
11854SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11855 auto VPLD = cast<VPLoadSDNode>(Op);
11856 bool Future = Subtarget.isISAFuture();
11857 SDLoc dl(Op);
11858 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11859 "Mask predication not supported");
11860 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11861 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11862 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11863 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11864 Len = AdjustLength(Len, EltBits, !Future, DAG);
11865 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11866 VPLD->getOperand(1), Len};
11867 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11868 SDValue VPL =
11870 VPLD->getMemoryVT(), VPLD->getMemOperand());
11871 return VPL;
11872}
11873
11874SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11875 auto VPST = cast<VPStoreSDNode>(Op);
11876 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11877 "Mask predication not supported");
11878 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11879 SDLoc dl(Op);
11880 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11881 unsigned EltBits =
11882 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11883 bool Future = Subtarget.isISAFuture();
11884 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11885 Len = AdjustLength(Len, EltBits, !Future, DAG);
11886 SDValue Ops[] = {
11887 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11888 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11889 VPST->getOperand(2), Len};
11890 SDVTList Tys = DAG.getVTList(MVT::Other);
11891 SDValue VPS =
11893 VPST->getMemoryVT(), VPST->getMemOperand());
11894 return VPS;
11895}
11896
11897SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11898 SelectionDAG &DAG) const {
11899 SDLoc dl(Op);
11900
11901 MachineFunction &MF = DAG.getMachineFunction();
11902 SDValue Op0 = Op.getOperand(0);
11903 EVT ValVT = Op0.getValueType();
11904 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11905 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11906 int64_t IntVal = Op.getConstantOperandVal(0);
11907 if (IntVal >= -16 && IntVal <= 15)
11908 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11909 dl);
11910 }
11911
11912 ReuseLoadInfo RLI;
11913 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11914 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11915 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11916 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11917
11918 MachineMemOperand *MMO =
11920 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11921 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11923 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11924 MVT::i32, MMO);
11925 if (RLI.ResChain)
11926 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11927 return Bits.getValue(0);
11928 }
11929
11930 // Create a stack slot that is 16-byte aligned.
11931 MachineFrameInfo &MFI = MF.getFrameInfo();
11932 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11933 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11934 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11935
11936 SDValue Val = Op0;
11937 // P10 hardware store forwarding requires that a single store contains all
11938 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11939 // to avoid load hit store on P10 when running binaries compiled for older
11940 // processors by generating two mergeable scalar stores to forward with the
11941 // vector load.
11942 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11943 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11944 ValVT.getSizeInBits() <= 64) {
11945 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11946 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11947 SDValue ShiftBy = DAG.getConstant(
11948 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11949 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11950 SDValue Plus8 =
11951 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11952 SDValue Store2 =
11953 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11954 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11955 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11956 MachinePointerInfo());
11957 }
11958
11959 // Store the input value into Value#0 of the stack slot.
11960 SDValue Store =
11961 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11962 // Load it out.
11963 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11964}
11965
11966SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11967 SelectionDAG &DAG) const {
11968 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11969 "Should only be called for ISD::INSERT_VECTOR_ELT");
11970
11971 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11972
11973 EVT VT = Op.getValueType();
11974 SDLoc dl(Op);
11975 SDValue V1 = Op.getOperand(0);
11976 SDValue V2 = Op.getOperand(1);
11977
11978 if (VT == MVT::v2f64 && C)
11979 return Op;
11980
11981 if (Subtarget.hasP9Vector()) {
11982 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11983 // because on P10, it allows this specific insert_vector_elt load pattern to
11984 // utilize the refactored load and store infrastructure in order to exploit
11985 // prefixed loads.
11986 // On targets with inexpensive direct moves (Power9 and up), a
11987 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11988 // load since a single precision load will involve conversion to double
11989 // precision on the load followed by another conversion to single precision.
11990 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11991 (isa<LoadSDNode>(V2))) {
11992 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11993 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11994 SDValue InsVecElt =
11995 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11996 BitcastLoad, Op.getOperand(2));
11997 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11998 }
11999 }
12000
12001 if (Subtarget.isISA3_1()) {
12002 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12003 return SDValue();
12004 // On P10, we have legal lowering for constant and variable indices for
12005 // all vectors.
12006 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12007 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12008 return Op;
12009 }
12010
12011 // Before P10, we have legal lowering for constant indices but not for
12012 // variable ones.
12013 if (!C)
12014 return SDValue();
12015
12016 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12017 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12018 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12019 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12020 unsigned InsertAtElement = C->getZExtValue();
12021 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12022 if (Subtarget.isLittleEndian()) {
12023 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12024 }
12025 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12026 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12027 }
12028 return Op;
12029}
12030
12031SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12032 SelectionDAG &DAG) const {
12033 SDLoc dl(Op);
12034 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12035 SDValue LoadChain = LN->getChain();
12036 SDValue BasePtr = LN->getBasePtr();
12037 EVT VT = Op.getValueType();
12038 bool IsV1024i1 = VT == MVT::v1024i1;
12039 bool IsV2048i1 = VT == MVT::v2048i1;
12040
12041 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12042 // Dense Math dmr pair registers, respectively.
12043 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12044 (void)IsV2048i1;
12045 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12046 "Dense Math support required.");
12047 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12048
12050 SmallVector<SDValue, 8> LoadChains;
12051
12052 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12053 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12054 MachineMemOperand *MMO = LN->getMemOperand();
12055 unsigned NumVecs = VT.getSizeInBits() / 256;
12056 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12057 MachineMemOperand *NewMMO =
12058 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12059 if (Idx > 0) {
12060 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12061 DAG.getConstant(32, dl, BasePtr.getValueType()));
12062 LoadOps[2] = BasePtr;
12063 }
12065 DAG.getVTList(MVT::v256i1, MVT::Other),
12066 LoadOps, MVT::v256i1, NewMMO);
12067 LoadChains.push_back(Ld.getValue(1));
12068 Loads.push_back(Ld);
12069 }
12070
12071 if (Subtarget.isLittleEndian()) {
12072 std::reverse(Loads.begin(), Loads.end());
12073 std::reverse(LoadChains.begin(), LoadChains.end());
12074 }
12075
12076 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12077 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12078
12079 if (IsV1024i1) {
12080 return DAG.getMergeValues({Value, TF}, dl);
12081 }
12082
12083 // Handle Loads for V2048i1 which represents a dmr pair.
12084 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12085 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12086
12087 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12088 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12089
12090 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12091 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12092
12093 SDValue DmrPValue = SDValue(
12094 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12095
12096 return DAG.getMergeValues({DmrPValue, TF}, dl);
12097}
12098
12099SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12100 const SDLoc &dl,
12101 SelectionDAG &DAG) const {
12102 SDValue Lo =
12103 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12104 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12105 SDValue Hi =
12106 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12107 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12108 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12109
12110 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12111 {RC, Lo, LoSub, Hi, HiSub}),
12112 0);
12113}
12114
12115SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12116 SelectionDAG &DAG) const {
12117 SDLoc dl(Op);
12118 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12119 SDValue LoadChain = LN->getChain();
12120 SDValue BasePtr = LN->getBasePtr();
12121 EVT VT = Op.getValueType();
12122
12123 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12124 return LowerDMFVectorLoad(Op, DAG);
12125
12126 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12127 return Op;
12128
12129 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12130 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12131 "Type unsupported without MMA");
12132 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12133 "Type unsupported without paired vector support");
12134
12135 // For v256i1 on ISA Future, let the load go through to instruction selection
12136 // where it will be matched to lxvp/plxvp by the instruction patterns.
12137 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12138 return Op;
12139
12140 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12141 // value in 2 or 4 vsx registers.
12142 Align Alignment = LN->getAlign();
12144 SmallVector<SDValue, 4> LoadChains;
12145 unsigned NumVecs = VT.getSizeInBits() / 128;
12146 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12147 SDValue Load =
12148 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12149 LN->getPointerInfo().getWithOffset(Idx * 16),
12150 commonAlignment(Alignment, Idx * 16),
12151 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12152 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12153 DAG.getConstant(16, dl, BasePtr.getValueType()));
12154 Loads.push_back(Load);
12155 LoadChains.push_back(Load.getValue(1));
12156 }
12157 if (Subtarget.isLittleEndian()) {
12158 std::reverse(Loads.begin(), Loads.end());
12159 std::reverse(LoadChains.begin(), LoadChains.end());
12160 }
12161 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12162 SDValue Value =
12163 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12164 dl, VT, Loads);
12165 SDValue RetOps[] = {Value, TF};
12166 return DAG.getMergeValues(RetOps, dl);
12167}
12168
12169SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12170 SelectionDAG &DAG) const {
12171
12172 SDLoc dl(Op);
12173 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12174 SDValue StoreChain = SN->getChain();
12175 SDValue BasePtr = SN->getBasePtr();
12178 EVT VT = SN->getValue().getValueType();
12179 bool IsV1024i1 = VT == MVT::v1024i1;
12180 bool IsV2048i1 = VT == MVT::v2048i1;
12181
12182 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12183 // Dense Math dmr pair registers, respectively.
12184 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12185 (void)IsV2048i1;
12186 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12187 "Dense Math support required.");
12188 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12189
12190 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12191 if (IsV1024i1) {
12193 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12194 Op.getOperand(1),
12195 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12196 0);
12198 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12199 Op.getOperand(1),
12200 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12201 0);
12202 MachineSDNode *ExtNode =
12203 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12204 Values.push_back(SDValue(ExtNode, 0));
12205 Values.push_back(SDValue(ExtNode, 1));
12206 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12207 Values.push_back(SDValue(ExtNode, 0));
12208 Values.push_back(SDValue(ExtNode, 1));
12209 } else {
12210 // This corresponds to v2048i1 which represents a dmr pair.
12211 SDValue Dmr0(
12212 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12213 Op.getOperand(1),
12214 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12215 0);
12216
12217 SDValue Dmr1(
12218 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12219 Op.getOperand(1),
12220 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12221 0);
12222
12223 SDValue Dmr0Lo(DAG.getMachineNode(
12224 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12225 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12226 0);
12227
12228 SDValue Dmr0Hi(DAG.getMachineNode(
12229 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12230 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12231 0);
12232
12233 SDValue Dmr1Lo(DAG.getMachineNode(
12234 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12235 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12236 0);
12237
12238 SDValue Dmr1Hi(DAG.getMachineNode(
12239 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12240 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12241 0);
12242
12243 MachineSDNode *ExtNode =
12244 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12245 Values.push_back(SDValue(ExtNode, 0));
12246 Values.push_back(SDValue(ExtNode, 1));
12247 ExtNode =
12248 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12249 Values.push_back(SDValue(ExtNode, 0));
12250 Values.push_back(SDValue(ExtNode, 1));
12251 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12252 Values.push_back(SDValue(ExtNode, 0));
12253 Values.push_back(SDValue(ExtNode, 1));
12254 ExtNode =
12255 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12256 Values.push_back(SDValue(ExtNode, 0));
12257 Values.push_back(SDValue(ExtNode, 1));
12258 }
12259
12260 if (Subtarget.isLittleEndian())
12261 std::reverse(Values.begin(), Values.end());
12262
12263 SDVTList Tys = DAG.getVTList(MVT::Other);
12265 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12266 Values[0], BasePtr};
12267 MachineMemOperand *MMO = SN->getMemOperand();
12268 unsigned NumVecs = VT.getSizeInBits() / 256;
12269 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12270 MachineMemOperand *NewMMO =
12271 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12272 if (Idx > 0) {
12273 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12274 DAG.getConstant(32, dl, BasePtr.getValueType()));
12275 Ops[3] = BasePtr;
12276 }
12277 Ops[2] = Values[Idx];
12279 MVT::v256i1, NewMMO);
12280 Stores.push_back(St);
12281 }
12282
12283 SDValue TF = DAG.getTokenFactor(dl, Stores);
12284 return TF;
12285}
12286
12287SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12288 SelectionDAG &DAG) const {
12289 SDLoc dl(Op);
12290 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12291 SDValue StoreChain = SN->getChain();
12292 SDValue BasePtr = SN->getBasePtr();
12293 SDValue Value = SN->getValue();
12294 SDValue Value2 = SN->getValue();
12295 EVT StoreVT = Value.getValueType();
12296
12297 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12298 return LowerDMFVectorStore(Op, DAG);
12299
12300 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12301 return Op;
12302
12303 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12304 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12305 "Type unsupported without MMA");
12306 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12307 "Type unsupported without paired vector support");
12308
12309 // For v256i1 on ISA Future, let the store go through to instruction selection
12310 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12311 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12313 return Op;
12314
12315 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12316 // accumulator underlying registers individually.
12317 Align Alignment = SN->getAlign();
12319 unsigned NumVecs = 2;
12320 if (StoreVT == MVT::v512i1) {
12321 if (Subtarget.isISAFuture()) {
12322 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12323 MachineSDNode *ExtNode = DAG.getMachineNode(
12324 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12325
12326 Value = SDValue(ExtNode, 0);
12327 Value2 = SDValue(ExtNode, 1);
12328 } else
12329 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12330 NumVecs = 4;
12331 }
12332 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12333 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12334 SDValue Elt;
12335 if (Subtarget.isISAFuture()) {
12336 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12337 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12338 Idx > 1 ? Value2 : Value,
12339 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12340 } else
12341 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12342 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12343
12344 SDValue Store =
12345 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12346 SN->getPointerInfo().getWithOffset(Idx * 16),
12347 commonAlignment(Alignment, Idx * 16),
12348 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12349 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12350 DAG.getConstant(16, dl, BasePtr.getValueType()));
12351 Stores.push_back(Store);
12352 }
12353 SDValue TF = DAG.getTokenFactor(dl, Stores);
12354 return TF;
12355}
12356
12357SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12358 SDLoc dl(Op);
12359 if (Op.getValueType() == MVT::v4i32) {
12360 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12361
12362 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12363 // +16 as shift amt.
12364 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12365 SDValue RHSSwap = // = vrlw RHS, 16
12366 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12367
12368 // Shrinkify inputs to v8i16.
12369 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12370 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12371 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12372
12373 // Low parts multiplied together, generating 32-bit results (we ignore the
12374 // top parts).
12375 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12376 LHS, RHS, DAG, dl, MVT::v4i32);
12377
12378 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12379 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12380 // Shift the high parts up 16 bits.
12381 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12382 Neg16, DAG, dl);
12383 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12384 } else if (Op.getValueType() == MVT::v16i8) {
12385 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12386 bool isLittleEndian = Subtarget.isLittleEndian();
12387
12388 // Multiply the even 8-bit parts, producing 16-bit sums.
12389 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12390 LHS, RHS, DAG, dl, MVT::v8i16);
12391 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12392
12393 // Multiply the odd 8-bit parts, producing 16-bit sums.
12394 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12395 LHS, RHS, DAG, dl, MVT::v8i16);
12396 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12397
12398 // Merge the results together. Because vmuleub and vmuloub are
12399 // instructions with a big-endian bias, we must reverse the
12400 // element numbering and reverse the meaning of "odd" and "even"
12401 // when generating little endian code.
12402 int Ops[16];
12403 for (unsigned i = 0; i != 8; ++i) {
12404 if (isLittleEndian) {
12405 Ops[i*2 ] = 2*i;
12406 Ops[i*2+1] = 2*i+16;
12407 } else {
12408 Ops[i*2 ] = 2*i+1;
12409 Ops[i*2+1] = 2*i+1+16;
12410 }
12411 }
12412 if (isLittleEndian)
12413 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12414 else
12415 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12416 } else {
12417 llvm_unreachable("Unknown mul to lower!");
12418 }
12419}
12420
12421SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12422 bool IsStrict = Op->isStrictFPOpcode();
12423 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12424 !Subtarget.hasP9Vector())
12425 return SDValue();
12426
12427 return Op;
12428}
12429
12430// Custom lowering for fpext vf32 to v2f64
12431SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12432
12433 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12434 "Should only be called for ISD::FP_EXTEND");
12435
12436 // FIXME: handle extends from half precision float vectors on P9.
12437 // We only want to custom lower an extend from v2f32 to v2f64.
12438 if (Op.getValueType() != MVT::v2f64 ||
12439 Op.getOperand(0).getValueType() != MVT::v2f32)
12440 return SDValue();
12441
12442 SDLoc dl(Op);
12443 SDValue Op0 = Op.getOperand(0);
12444
12445 switch (Op0.getOpcode()) {
12446 default:
12447 return SDValue();
12449 assert(Op0.getNumOperands() == 2 &&
12451 "Node should have 2 operands with second one being a constant!");
12452
12453 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12454 return SDValue();
12455
12456 // Custom lower is only done for high or low doubleword.
12457 int Idx = Op0.getConstantOperandVal(1);
12458 if (Idx % 2 != 0)
12459 return SDValue();
12460
12461 // Since input is v4f32, at this point Idx is either 0 or 2.
12462 // Shift to get the doubleword position we want.
12463 int DWord = Idx >> 1;
12464
12465 // High and low word positions are different on little endian.
12466 if (Subtarget.isLittleEndian())
12467 DWord ^= 0x1;
12468
12469 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12470 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12471 }
12472 case ISD::FADD:
12473 case ISD::FMUL:
12474 case ISD::FSUB: {
12475 SDValue NewLoad[2];
12476 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12477 // Ensure both input are loads.
12478 SDValue LdOp = Op0.getOperand(i);
12479 if (LdOp.getOpcode() != ISD::LOAD)
12480 return SDValue();
12481 // Generate new load node.
12482 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12483 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12484 NewLoad[i] = DAG.getMemIntrinsicNode(
12485 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12486 LD->getMemoryVT(), LD->getMemOperand());
12487 }
12488 SDValue NewOp =
12489 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12490 NewLoad[1], Op0.getNode()->getFlags());
12491 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12492 DAG.getConstant(0, dl, MVT::i32));
12493 }
12494 case ISD::LOAD: {
12495 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12496 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12497 SDValue NewLd = DAG.getMemIntrinsicNode(
12498 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12499 LD->getMemoryVT(), LD->getMemOperand());
12500 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12501 DAG.getConstant(0, dl, MVT::i32));
12502 }
12503 }
12504 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12505}
12506
12508 SelectionDAG &DAG,
12509 const PPCSubtarget &STI) {
12510 SDLoc DL(Value);
12511 if (STI.useCRBits())
12512 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12513 DAG.getConstant(1, DL, SumType),
12514 DAG.getConstant(0, DL, SumType));
12515 else
12516 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12517 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12518 Value, DAG.getAllOnesConstant(DL, SumType));
12519 return Sum.getValue(1);
12520}
12521
12523 EVT CarryType, SelectionDAG &DAG,
12524 const PPCSubtarget &STI) {
12525 SDLoc DL(Flag);
12526 SDValue Zero = DAG.getConstant(0, DL, SumType);
12527 SDValue Carry = DAG.getNode(
12528 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12529 if (STI.useCRBits())
12530 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12531 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12532}
12533
12534SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12535
12536 SDLoc DL(Op);
12537 SDNode *N = Op.getNode();
12538 EVT VT = N->getValueType(0);
12539 EVT CarryType = N->getValueType(1);
12540 unsigned Opc = N->getOpcode();
12541 bool IsAdd = Opc == ISD::UADDO;
12542 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12543 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12544 N->getOperand(0), N->getOperand(1));
12545 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12546 DAG, Subtarget);
12547 if (!IsAdd)
12548 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12549 DAG.getConstant(1UL, DL, CarryType));
12550 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12551}
12552
12553SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12554 SelectionDAG &DAG) const {
12555 SDLoc DL(Op);
12556 SDNode *N = Op.getNode();
12557 unsigned Opc = N->getOpcode();
12558 EVT VT = N->getValueType(0);
12559 EVT CarryType = N->getValueType(1);
12560 SDValue CarryOp = N->getOperand(2);
12561 bool IsAdd = Opc == ISD::UADDO_CARRY;
12562 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12563 if (!IsAdd)
12564 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12565 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12566 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12567 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12568 Op.getOperand(0), Op.getOperand(1), CarryOp);
12569 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12570 Subtarget);
12571 if (!IsAdd)
12572 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12573 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12574 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12575}
12576
12577SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12578
12579 SDLoc dl(Op);
12580 SDValue LHS = Op.getOperand(0);
12581 SDValue RHS = Op.getOperand(1);
12582 EVT VT = Op.getNode()->getValueType(0);
12583
12584 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12585
12586 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12587 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12588
12589 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12590
12591 SDValue Overflow =
12592 DAG.getNode(ISD::SRL, dl, VT, And,
12593 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12594
12595 SDValue OverflowTrunc =
12596 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12597
12598 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12599}
12600
12601/// Implements signed add with overflow detection using the rule:
12602/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12603SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12604
12605 SDLoc dl(Op);
12606 SDValue LHS = Op.getOperand(0);
12607 SDValue RHS = Op.getOperand(1);
12608 EVT VT = Op.getNode()->getValueType(0);
12609
12610 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12611
12612 // Compute ~(x xor y)
12613 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12614 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12615 // Compute (s xor x)
12616 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12617
12618 // overflow = (x eqv y) & (s xor x)
12619 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12620
12621 // Shift sign bit down to LSB
12622 SDValue Overflow =
12623 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12624 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12625 // Truncate to the overflow type (i1)
12626 SDValue OverflowTrunc =
12627 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12628
12629 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12630}
12631
12632// Lower unsigned 3-way compare producing -1/0/1.
12633SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12634 SDLoc DL(Op);
12635 SDValue A = DAG.getFreeze(Op.getOperand(0));
12636 SDValue B = DAG.getFreeze(Op.getOperand(1));
12637 EVT OpVT = A.getValueType();
12638 EVT ResVT = Op.getValueType();
12639
12640 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12641 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12642 // comparison.
12643 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12644 A = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, A);
12645 B = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, B);
12646 OpVT = MVT::i64;
12647 }
12648
12649 // First compute diff = A - B.
12650 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12651
12652 // Generate B - A using SUBC to capture carry.
12653 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12654 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12655 SDValue CA0 = SubC.getValue(1);
12656
12657 // t2 = A - B + CA0 using SUBE.
12658 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12659 SDValue CA1 = SubE1.getValue(1);
12660
12661 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12662 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12663
12664 // Extract the first result and truncate to result type if needed.
12665 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12666}
12667
12668/// LowerOperation - Provide custom lowering hooks for some operations.
12669///
12671 switch (Op.getOpcode()) {
12672 default:
12673 llvm_unreachable("Wasn't expecting to be able to lower this!");
12674 case ISD::FPOW: return lowerPow(Op, DAG);
12675 case ISD::FSIN: return lowerSin(Op, DAG);
12676 case ISD::FCOS: return lowerCos(Op, DAG);
12677 case ISD::FLOG: return lowerLog(Op, DAG);
12678 case ISD::FLOG10: return lowerLog10(Op, DAG);
12679 case ISD::FEXP: return lowerExp(Op, DAG);
12680 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12681 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12682 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12683 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12684 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12685 case ISD::STRICT_FSETCC:
12687 case ISD::SETCC: return LowerSETCC(Op, DAG);
12688 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12689 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12690 case ISD::SSUBO:
12691 return LowerSSUBO(Op, DAG);
12692 case ISD::SADDO:
12693 return LowerSADDO(Op, DAG);
12694
12695 case ISD::INLINEASM:
12696 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12697 // Variable argument lowering.
12698 case ISD::VASTART: return LowerVASTART(Op, DAG);
12699 case ISD::VAARG: return LowerVAARG(Op, DAG);
12700 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12701
12702 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12703 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12705 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12706
12707 // Exception handling lowering.
12708 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12709 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12710 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12711
12712 case ISD::LOAD: return LowerLOAD(Op, DAG);
12713 case ISD::STORE: return LowerSTORE(Op, DAG);
12714 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12715 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12718 case ISD::FP_TO_UINT:
12719 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12722 case ISD::UINT_TO_FP:
12723 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12724 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12725 case ISD::SET_ROUNDING:
12726 return LowerSET_ROUNDING(Op, DAG);
12727
12728 // Lower 64-bit shifts.
12729 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12730 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12731 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12732
12733 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12734 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12735
12736 // Vector-related lowering.
12737 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12738 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12739 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12740 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12741 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12742 case ISD::MUL: return LowerMUL(Op, DAG);
12743 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12745 case ISD::FP_ROUND:
12746 return LowerFP_ROUND(Op, DAG);
12747 case ISD::ROTL: return LowerROTL(Op, DAG);
12748
12749 // For counter-based loop handling.
12751 return SDValue();
12752
12753 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12754
12755 // Frame & Return address.
12756 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12757 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12758
12760 return LowerINTRINSIC_VOID(Op, DAG);
12761 case ISD::BSWAP:
12762 return LowerBSWAP(Op, DAG);
12764 return LowerATOMIC_CMP_SWAP(Op, DAG);
12765 case ISD::ATOMIC_STORE:
12766 return LowerATOMIC_LOAD_STORE(Op, DAG);
12767 case ISD::IS_FPCLASS:
12768 return LowerIS_FPCLASS(Op, DAG);
12769 case ISD::UADDO:
12770 case ISD::USUBO:
12771 return LowerADDSUBO(Op, DAG);
12772 case ISD::UADDO_CARRY:
12773 case ISD::USUBO_CARRY:
12774 return LowerADDSUBO_CARRY(Op, DAG);
12775 case ISD::UCMP:
12776 return LowerUCMP(Op, DAG);
12777 case ISD::STRICT_LRINT:
12778 case ISD::STRICT_LLRINT:
12779 case ISD::STRICT_LROUND:
12782 if (Op->getFlags().hasNoFPExcept())
12783 return Op;
12784 return SDValue();
12785 case ISD::VP_LOAD:
12786 return LowerVP_LOAD(Op, DAG);
12787 case ISD::VP_STORE:
12788 return LowerVP_STORE(Op, DAG);
12789 }
12790}
12791
12794 SelectionDAG &DAG) const {
12795 SDLoc dl(N);
12796 switch (N->getOpcode()) {
12797 default:
12798 llvm_unreachable("Do not know how to custom type legalize this operation!");
12799 case ISD::ATOMIC_LOAD: {
12800 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12801 Results.push_back(Res);
12802 Results.push_back(Res.getValue(1));
12803 break;
12804 }
12805 case ISD::READCYCLECOUNTER: {
12806 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12807 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12808
12809 Results.push_back(
12810 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12811 Results.push_back(RTB.getValue(2));
12812 break;
12813 }
12815 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12816 break;
12817
12818 assert(N->getValueType(0) == MVT::i1 &&
12819 "Unexpected result type for CTR decrement intrinsic");
12820 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12821 N->getValueType(0));
12822 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12823 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12824 N->getOperand(1));
12825
12826 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12827 Results.push_back(NewInt.getValue(1));
12828 break;
12829 }
12831 switch (N->getConstantOperandVal(0)) {
12832 case Intrinsic::ppc_pack_longdouble:
12833 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12834 N->getOperand(2), N->getOperand(1)));
12835 break;
12836 case Intrinsic::ppc_maxfe:
12837 case Intrinsic::ppc_minfe:
12838 case Intrinsic::ppc_fnmsub:
12839 case Intrinsic::ppc_convert_f128_to_ppcf128:
12840 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12841 break;
12842 }
12843 break;
12844 }
12845 case ISD::VAARG: {
12846 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12847 return;
12848
12849 EVT VT = N->getValueType(0);
12850
12851 if (VT == MVT::i64) {
12852 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12853
12854 Results.push_back(NewNode);
12855 Results.push_back(NewNode.getValue(1));
12856 }
12857 return;
12858 }
12861 case ISD::FP_TO_SINT:
12862 case ISD::FP_TO_UINT: {
12863 // LowerFP_TO_INT() can only handle f32 and f64.
12864 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12865 MVT::ppcf128)
12866 return;
12867 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12868 Results.push_back(LoweredValue);
12869 if (N->isStrictFPOpcode())
12870 Results.push_back(LoweredValue.getValue(1));
12871 return;
12872 }
12873 case ISD::TRUNCATE: {
12874 if (!N->getValueType(0).isVector())
12875 return;
12876 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12877 if (Lowered)
12878 Results.push_back(Lowered);
12879 return;
12880 }
12881 case ISD::SCALAR_TO_VECTOR: {
12882 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12883 if (Lowered)
12884 Results.push_back(Lowered);
12885 return;
12886 }
12887 case ISD::FSHL:
12888 case ISD::FSHR:
12889 // Don't handle funnel shifts here.
12890 return;
12891 case ISD::BITCAST:
12892 // Don't handle bitcast here.
12893 return;
12894 case ISD::FP_EXTEND:
12895 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12896 if (Lowered)
12897 Results.push_back(Lowered);
12898 return;
12899 }
12900}
12901
12902//===----------------------------------------------------------------------===//
12903// Other Lowering Code
12904//===----------------------------------------------------------------------===//
12905
12907 return Builder.CreateIntrinsic(Id, {});
12908}
12909
12911 Value *Addr,
12912 AtomicOrdering Ord) const {
12913 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12914
12915 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12916 "Only 8/16/32/64-bit atomic loads supported");
12917 Intrinsic::ID IntID;
12918 switch (SZ) {
12919 default:
12920 llvm_unreachable("Unexpected PrimitiveSize");
12921 case 8:
12922 IntID = Intrinsic::ppc_lbarx;
12923 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12924 break;
12925 case 16:
12926 IntID = Intrinsic::ppc_lharx;
12927 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12928 break;
12929 case 32:
12930 IntID = Intrinsic::ppc_lwarx;
12931 break;
12932 case 64:
12933 IntID = Intrinsic::ppc_ldarx;
12934 break;
12935 }
12936 Value *Call =
12937 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12938
12939 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12940}
12941
12942// Perform a store-conditional operation to Addr. Return the status of the
12943// store. This should be 0 if the store succeeded, non-zero otherwise.
12945 Value *Val, Value *Addr,
12946 AtomicOrdering Ord) const {
12947 Type *Ty = Val->getType();
12948 unsigned SZ = Ty->getPrimitiveSizeInBits();
12949
12950 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12951 "Only 8/16/32/64-bit atomic loads supported");
12952 Intrinsic::ID IntID;
12953 switch (SZ) {
12954 default:
12955 llvm_unreachable("Unexpected PrimitiveSize");
12956 case 8:
12957 IntID = Intrinsic::ppc_stbcx;
12958 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12959 break;
12960 case 16:
12961 IntID = Intrinsic::ppc_sthcx;
12962 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12963 break;
12964 case 32:
12965 IntID = Intrinsic::ppc_stwcx;
12966 break;
12967 case 64:
12968 IntID = Intrinsic::ppc_stdcx;
12969 break;
12970 }
12971
12972 if (SZ == 8 || SZ == 16)
12973 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12974
12975 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12976 /*FMFSource=*/nullptr, "stcx");
12977 return Builder.CreateXor(Call, Builder.getInt32(1));
12978}
12979
12980// The mappings for emitLeading/TrailingFence is taken from
12981// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12983 Instruction *Inst,
12984 AtomicOrdering Ord) const {
12986 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12987 if (isReleaseOrStronger(Ord))
12988 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12989 return nullptr;
12990}
12991
12993 Instruction *Inst,
12994 AtomicOrdering Ord) const {
12995 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12996 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12997 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12998 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12999 if (isa<LoadInst>(Inst))
13000 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13001 {Inst});
13002 // FIXME: Can use isync for rmw operation.
13003 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13004 }
13005 return nullptr;
13006}
13007
13010 unsigned AtomicSize,
13011 unsigned BinOpcode,
13012 unsigned CmpOpcode,
13013 unsigned CmpPred) const {
13014 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13015 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13016
13017 auto LoadMnemonic = PPC::LDARX;
13018 auto StoreMnemonic = PPC::STDCX;
13019 switch (AtomicSize) {
13020 default:
13021 llvm_unreachable("Unexpected size of atomic entity");
13022 case 1:
13023 LoadMnemonic = PPC::LBARX;
13024 StoreMnemonic = PPC::STBCX;
13025 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13026 break;
13027 case 2:
13028 LoadMnemonic = PPC::LHARX;
13029 StoreMnemonic = PPC::STHCX;
13030 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13031 break;
13032 case 4:
13033 LoadMnemonic = PPC::LWARX;
13034 StoreMnemonic = PPC::STWCX;
13035 break;
13036 case 8:
13037 LoadMnemonic = PPC::LDARX;
13038 StoreMnemonic = PPC::STDCX;
13039 break;
13040 }
13041
13042 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13043 MachineFunction *F = BB->getParent();
13045
13046 Register dest = MI.getOperand(0).getReg();
13047 Register ptrA = MI.getOperand(1).getReg();
13048 Register ptrB = MI.getOperand(2).getReg();
13049 Register incr = MI.getOperand(3).getReg();
13050 DebugLoc dl = MI.getDebugLoc();
13051
13052 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13053 MachineBasicBlock *loop2MBB =
13054 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13055 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13056 F->insert(It, loopMBB);
13057 if (CmpOpcode)
13058 F->insert(It, loop2MBB);
13059 F->insert(It, exitMBB);
13060 exitMBB->splice(exitMBB->begin(), BB,
13061 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13063
13064 MachineRegisterInfo &RegInfo = F->getRegInfo();
13065 Register TmpReg = (!BinOpcode) ? incr :
13066 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13067 : &PPC::GPRCRegClass);
13068
13069 // thisMBB:
13070 // ...
13071 // fallthrough --> loopMBB
13072 BB->addSuccessor(loopMBB);
13073
13074 // loopMBB:
13075 // l[wd]arx dest, ptr
13076 // add r0, dest, incr
13077 // st[wd]cx. r0, ptr
13078 // bne- loopMBB
13079 // fallthrough --> exitMBB
13080
13081 // For max/min...
13082 // loopMBB:
13083 // l[wd]arx dest, ptr
13084 // cmpl?[wd] dest, incr
13085 // bgt exitMBB
13086 // loop2MBB:
13087 // st[wd]cx. dest, ptr
13088 // bne- loopMBB
13089 // fallthrough --> exitMBB
13090
13091 BB = loopMBB;
13092 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13093 .addReg(ptrA).addReg(ptrB);
13094 if (BinOpcode)
13095 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13096 if (CmpOpcode) {
13097 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13098 // Signed comparisons of byte or halfword values must be sign-extended.
13099 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13100 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13101 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13102 ExtReg).addReg(dest);
13103 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13104 } else
13105 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13106
13107 BuildMI(BB, dl, TII->get(PPC::BCC))
13108 .addImm(CmpPred)
13109 .addReg(CrReg)
13110 .addMBB(exitMBB);
13111 BB->addSuccessor(loop2MBB);
13112 BB->addSuccessor(exitMBB);
13113 BB = loop2MBB;
13114 }
13115 BuildMI(BB, dl, TII->get(StoreMnemonic))
13116 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13117 BuildMI(BB, dl, TII->get(PPC::BCC))
13119 .addReg(PPC::CR0)
13120 .addMBB(loopMBB);
13121 BB->addSuccessor(loopMBB);
13122 BB->addSuccessor(exitMBB);
13123
13124 // exitMBB:
13125 // ...
13126 BB = exitMBB;
13127 return BB;
13128}
13129
13131 switch(MI.getOpcode()) {
13132 default:
13133 return false;
13134 case PPC::COPY:
13135 return TII->isSignExtended(MI.getOperand(1).getReg(),
13136 &MI.getMF()->getRegInfo());
13137 case PPC::LHA:
13138 case PPC::LHA8:
13139 case PPC::LHAU:
13140 case PPC::LHAU8:
13141 case PPC::LHAUX:
13142 case PPC::LHAUX8:
13143 case PPC::LHAX:
13144 case PPC::LHAX8:
13145 case PPC::LWA:
13146 case PPC::LWAUX:
13147 case PPC::LWAX:
13148 case PPC::LWAX_32:
13149 case PPC::LWA_32:
13150 case PPC::PLHA:
13151 case PPC::PLHA8:
13152 case PPC::PLHA8pc:
13153 case PPC::PLHApc:
13154 case PPC::PLWA:
13155 case PPC::PLWA8:
13156 case PPC::PLWA8pc:
13157 case PPC::PLWApc:
13158 case PPC::EXTSB:
13159 case PPC::EXTSB8:
13160 case PPC::EXTSB8_32_64:
13161 case PPC::EXTSB8_rec:
13162 case PPC::EXTSB_rec:
13163 case PPC::EXTSH:
13164 case PPC::EXTSH8:
13165 case PPC::EXTSH8_32_64:
13166 case PPC::EXTSH8_rec:
13167 case PPC::EXTSH_rec:
13168 case PPC::EXTSW:
13169 case PPC::EXTSWSLI:
13170 case PPC::EXTSWSLI_32_64:
13171 case PPC::EXTSWSLI_32_64_rec:
13172 case PPC::EXTSWSLI_rec:
13173 case PPC::EXTSW_32:
13174 case PPC::EXTSW_32_64:
13175 case PPC::EXTSW_32_64_rec:
13176 case PPC::EXTSW_rec:
13177 case PPC::SRAW:
13178 case PPC::SRAWI:
13179 case PPC::SRAWI_rec:
13180 case PPC::SRAW_rec:
13181 return true;
13182 }
13183 return false;
13184}
13185
13188 bool is8bit, // operation
13189 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13190 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13191 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13192
13193 // If this is a signed comparison and the value being compared is not known
13194 // to be sign extended, sign extend it here.
13195 DebugLoc dl = MI.getDebugLoc();
13196 MachineFunction *F = BB->getParent();
13197 MachineRegisterInfo &RegInfo = F->getRegInfo();
13198 Register incr = MI.getOperand(3).getReg();
13199 bool IsSignExtended =
13200 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13201
13202 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13203 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13204 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13205 .addReg(MI.getOperand(3).getReg());
13206 MI.getOperand(3).setReg(ValueReg);
13207 incr = ValueReg;
13208 }
13209 // If we support part-word atomic mnemonics, just use them
13210 if (Subtarget.hasPartwordAtomics())
13211 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13212 CmpPred);
13213
13214 // In 64 bit mode we have to use 64 bits for addresses, even though the
13215 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13216 // registers without caring whether they're 32 or 64, but here we're
13217 // doing actual arithmetic on the addresses.
13218 bool is64bit = Subtarget.isPPC64();
13219 bool isLittleEndian = Subtarget.isLittleEndian();
13220 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13221
13222 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13224
13225 Register dest = MI.getOperand(0).getReg();
13226 Register ptrA = MI.getOperand(1).getReg();
13227 Register ptrB = MI.getOperand(2).getReg();
13228
13229 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13230 MachineBasicBlock *loop2MBB =
13231 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13232 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13233 F->insert(It, loopMBB);
13234 if (CmpOpcode)
13235 F->insert(It, loop2MBB);
13236 F->insert(It, exitMBB);
13237 exitMBB->splice(exitMBB->begin(), BB,
13238 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13240
13241 const TargetRegisterClass *RC =
13242 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13243 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13244
13245 Register PtrReg = RegInfo.createVirtualRegister(RC);
13246 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13247 Register ShiftReg =
13248 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13249 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13250 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13251 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13252 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13253 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13254 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13255 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13256 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13257 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13258 Register Ptr1Reg;
13259 Register TmpReg =
13260 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13261
13262 // thisMBB:
13263 // ...
13264 // fallthrough --> loopMBB
13265 BB->addSuccessor(loopMBB);
13266
13267 // The 4-byte load must be aligned, while a char or short may be
13268 // anywhere in the word. Hence all this nasty bookkeeping code.
13269 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13270 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13271 // xori shift, shift1, 24 [16]
13272 // rlwinm ptr, ptr1, 0, 0, 29
13273 // slw incr2, incr, shift
13274 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13275 // slw mask, mask2, shift
13276 // loopMBB:
13277 // lwarx tmpDest, ptr
13278 // add tmp, tmpDest, incr2
13279 // andc tmp2, tmpDest, mask
13280 // and tmp3, tmp, mask
13281 // or tmp4, tmp3, tmp2
13282 // stwcx. tmp4, ptr
13283 // bne- loopMBB
13284 // fallthrough --> exitMBB
13285 // srw SrwDest, tmpDest, shift
13286 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13287 if (ptrA != ZeroReg) {
13288 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13289 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13290 .addReg(ptrA)
13291 .addReg(ptrB);
13292 } else {
13293 Ptr1Reg = ptrB;
13294 }
13295 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13296 // mode.
13297 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13298 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13299 .addImm(3)
13300 .addImm(27)
13301 .addImm(is8bit ? 28 : 27);
13302 if (!isLittleEndian)
13303 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13304 .addReg(Shift1Reg)
13305 .addImm(is8bit ? 24 : 16);
13306 if (is64bit)
13307 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13308 .addReg(Ptr1Reg)
13309 .addImm(0)
13310 .addImm(61);
13311 else
13312 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13313 .addReg(Ptr1Reg)
13314 .addImm(0)
13315 .addImm(0)
13316 .addImm(29);
13317 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13318 if (is8bit)
13319 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13320 else {
13321 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13322 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13323 .addReg(Mask3Reg)
13324 .addImm(65535);
13325 }
13326 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13327 .addReg(Mask2Reg)
13328 .addReg(ShiftReg);
13329
13330 BB = loopMBB;
13331 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13332 .addReg(ZeroReg)
13333 .addReg(PtrReg);
13334 if (BinOpcode)
13335 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13336 .addReg(Incr2Reg)
13337 .addReg(TmpDestReg);
13338 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13339 .addReg(TmpDestReg)
13340 .addReg(MaskReg);
13341 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13342 if (CmpOpcode) {
13343 // For unsigned comparisons, we can directly compare the shifted values.
13344 // For signed comparisons we shift and sign extend.
13345 Register SReg = RegInfo.createVirtualRegister(GPRC);
13346 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13347 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13348 .addReg(TmpDestReg)
13349 .addReg(MaskReg);
13350 unsigned ValueReg = SReg;
13351 unsigned CmpReg = Incr2Reg;
13352 if (CmpOpcode == PPC::CMPW) {
13353 ValueReg = RegInfo.createVirtualRegister(GPRC);
13354 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13355 .addReg(SReg)
13356 .addReg(ShiftReg);
13357 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13358 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13359 .addReg(ValueReg);
13360 ValueReg = ValueSReg;
13361 CmpReg = incr;
13362 }
13363 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13364 BuildMI(BB, dl, TII->get(PPC::BCC))
13365 .addImm(CmpPred)
13366 .addReg(CrReg)
13367 .addMBB(exitMBB);
13368 BB->addSuccessor(loop2MBB);
13369 BB->addSuccessor(exitMBB);
13370 BB = loop2MBB;
13371 }
13372 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13373 BuildMI(BB, dl, TII->get(PPC::STWCX))
13374 .addReg(Tmp4Reg)
13375 .addReg(ZeroReg)
13376 .addReg(PtrReg);
13377 BuildMI(BB, dl, TII->get(PPC::BCC))
13379 .addReg(PPC::CR0)
13380 .addMBB(loopMBB);
13381 BB->addSuccessor(loopMBB);
13382 BB->addSuccessor(exitMBB);
13383
13384 // exitMBB:
13385 // ...
13386 BB = exitMBB;
13387 // Since the shift amount is not a constant, we need to clear
13388 // the upper bits with a separate RLWINM.
13389 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13390 .addReg(SrwDestReg)
13391 .addImm(0)
13392 .addImm(is8bit ? 24 : 16)
13393 .addImm(31);
13394 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13395 .addReg(TmpDestReg)
13396 .addReg(ShiftReg);
13397 return BB;
13398}
13399
13402 MachineBasicBlock *MBB) const {
13403 DebugLoc DL = MI.getDebugLoc();
13404 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13405 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13406
13407 MachineFunction *MF = MBB->getParent();
13408 MachineRegisterInfo &MRI = MF->getRegInfo();
13409
13410 const BasicBlock *BB = MBB->getBasicBlock();
13411 MachineFunction::iterator I = ++MBB->getIterator();
13412
13413 Register DstReg = MI.getOperand(0).getReg();
13414 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13415 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13416 Register mainDstReg = MRI.createVirtualRegister(RC);
13417 Register restoreDstReg = MRI.createVirtualRegister(RC);
13418
13419 MVT PVT = getPointerTy(MF->getDataLayout());
13420 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13421 "Invalid Pointer Size!");
13422 // For v = setjmp(buf), we generate
13423 //
13424 // thisMBB:
13425 // SjLjSetup mainMBB
13426 // bl mainMBB
13427 // v_restore = 1
13428 // b sinkMBB
13429 //
13430 // mainMBB:
13431 // buf[LabelOffset] = LR
13432 // v_main = 0
13433 //
13434 // sinkMBB:
13435 // v = phi(main, restore)
13436 //
13437
13438 MachineBasicBlock *thisMBB = MBB;
13439 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13440 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13441 MF->insert(I, mainMBB);
13442 MF->insert(I, sinkMBB);
13443
13445
13446 // Transfer the remainder of BB and its successor edges to sinkMBB.
13447 sinkMBB->splice(sinkMBB->begin(), MBB,
13448 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13450
13451 // Note that the structure of the jmp_buf used here is not compatible
13452 // with that used by libc, and is not designed to be. Specifically, it
13453 // stores only those 'reserved' registers that LLVM does not otherwise
13454 // understand how to spill. Also, by convention, by the time this
13455 // intrinsic is called, Clang has already stored the frame address in the
13456 // first slot of the buffer and stack address in the third. Following the
13457 // X86 target code, we'll store the jump address in the second slot. We also
13458 // need to save the TOC pointer (R2) to handle jumps between shared
13459 // libraries, and that will be stored in the fourth slot. The thread
13460 // identifier (R13) is not affected.
13461
13462 // thisMBB:
13463 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13464 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13465 const int64_t BPOffset = 4 * PVT.getStoreSize();
13466
13467 // Prepare IP either in reg.
13468 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13469 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13470 Register BufReg = MI.getOperand(1).getReg();
13471
13472 if (Subtarget.is64BitELFABI()) {
13473 setUsesTOCBasePtr(*MBB->getParent());
13474 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13475 .addReg(PPC::X2)
13476 .addImm(TOCOffset)
13477 .addReg(BufReg)
13478 .cloneMemRefs(MI);
13479 }
13480
13481 // Naked functions never have a base pointer, and so we use r1. For all
13482 // other functions, this decision must be delayed until during PEI.
13483 unsigned BaseReg;
13484 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13485 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13486 else
13487 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13488
13489 MIB = BuildMI(*thisMBB, MI, DL,
13490 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13491 .addReg(BaseReg)
13492 .addImm(BPOffset)
13493 .addReg(BufReg)
13494 .cloneMemRefs(MI);
13495
13496 // Setup
13497 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13498 MIB.addRegMask(TRI->getNoPreservedMask());
13499
13500 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13501
13502 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13503 .addMBB(mainMBB);
13504 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13505
13506 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13507 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13508
13509 // mainMBB:
13510 // mainDstReg = 0
13511 MIB =
13512 BuildMI(mainMBB, DL,
13513 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13514
13515 // Store IP
13516 if (Subtarget.isPPC64()) {
13517 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13518 .addReg(LabelReg)
13519 .addImm(LabelOffset)
13520 .addReg(BufReg);
13521 } else {
13522 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13523 .addReg(LabelReg)
13524 .addImm(LabelOffset)
13525 .addReg(BufReg);
13526 }
13527 MIB.cloneMemRefs(MI);
13528
13529 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13530 mainMBB->addSuccessor(sinkMBB);
13531
13532 // sinkMBB:
13533 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13534 TII->get(PPC::PHI), DstReg)
13535 .addReg(mainDstReg).addMBB(mainMBB)
13536 .addReg(restoreDstReg).addMBB(thisMBB);
13537
13538 MI.eraseFromParent();
13539 return sinkMBB;
13540}
13541
13544 MachineBasicBlock *MBB) const {
13545 DebugLoc DL = MI.getDebugLoc();
13546 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13547
13548 MachineFunction *MF = MBB->getParent();
13549 MachineRegisterInfo &MRI = MF->getRegInfo();
13550
13551 MVT PVT = getPointerTy(MF->getDataLayout());
13552 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13553 "Invalid Pointer Size!");
13554
13555 const TargetRegisterClass *RC =
13556 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13557 Register Tmp = MRI.createVirtualRegister(RC);
13558 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13559 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13560 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13561 unsigned BP =
13562 (PVT == MVT::i64)
13563 ? PPC::X30
13564 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13565 : PPC::R30);
13566
13568
13569 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13570 const int64_t SPOffset = 2 * PVT.getStoreSize();
13571 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13572 const int64_t BPOffset = 4 * PVT.getStoreSize();
13573
13574 Register BufReg = MI.getOperand(0).getReg();
13575
13576 // Reload FP (the jumped-to function may not have had a
13577 // frame pointer, and if so, then its r31 will be restored
13578 // as necessary).
13579 if (PVT == MVT::i64) {
13580 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13581 .addImm(0)
13582 .addReg(BufReg);
13583 } else {
13584 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13585 .addImm(0)
13586 .addReg(BufReg);
13587 }
13588 MIB.cloneMemRefs(MI);
13589
13590 // Reload IP
13591 if (PVT == MVT::i64) {
13592 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13593 .addImm(LabelOffset)
13594 .addReg(BufReg);
13595 } else {
13596 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13597 .addImm(LabelOffset)
13598 .addReg(BufReg);
13599 }
13600 MIB.cloneMemRefs(MI);
13601
13602 // Reload SP
13603 if (PVT == MVT::i64) {
13604 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13605 .addImm(SPOffset)
13606 .addReg(BufReg);
13607 } else {
13608 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13609 .addImm(SPOffset)
13610 .addReg(BufReg);
13611 }
13612 MIB.cloneMemRefs(MI);
13613
13614 // Reload BP
13615 if (PVT == MVT::i64) {
13616 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13617 .addImm(BPOffset)
13618 .addReg(BufReg);
13619 } else {
13620 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13621 .addImm(BPOffset)
13622 .addReg(BufReg);
13623 }
13624 MIB.cloneMemRefs(MI);
13625
13626 // Reload TOC
13627 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13628 setUsesTOCBasePtr(*MBB->getParent());
13629 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13630 .addImm(TOCOffset)
13631 .addReg(BufReg)
13632 .cloneMemRefs(MI);
13633 }
13634
13635 // Jump
13636 BuildMI(*MBB, MI, DL,
13637 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13638 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13639
13640 MI.eraseFromParent();
13641 return MBB;
13642}
13643
13645 // If the function specifically requests inline stack probes, emit them.
13646 if (MF.getFunction().hasFnAttribute("probe-stack"))
13647 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13648 "inline-asm";
13649 return false;
13650}
13651
13653 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13654 unsigned StackAlign = TFI->getStackAlignment();
13655 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13656 "Unexpected stack alignment");
13657 // The default stack probe size is 4096 if the function has no
13658 // stack-probe-size attribute.
13659 const Function &Fn = MF.getFunction();
13660 unsigned StackProbeSize =
13661 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13662 // Round down to the stack alignment.
13663 StackProbeSize &= ~(StackAlign - 1);
13664 return StackProbeSize ? StackProbeSize : StackAlign;
13665}
13666
13667// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13668// into three phases. In the first phase, it uses pseudo instruction
13669// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13670// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13671// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13672// MaxCallFrameSize so that it can calculate correct data area pointer.
13675 MachineBasicBlock *MBB) const {
13676 const bool isPPC64 = Subtarget.isPPC64();
13677 MachineFunction *MF = MBB->getParent();
13678 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13679 DebugLoc DL = MI.getDebugLoc();
13680 const unsigned ProbeSize = getStackProbeSize(*MF);
13681 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13682 MachineRegisterInfo &MRI = MF->getRegInfo();
13683 // The CFG of probing stack looks as
13684 // +-----+
13685 // | MBB |
13686 // +--+--+
13687 // |
13688 // +----v----+
13689 // +--->+ TestMBB +---+
13690 // | +----+----+ |
13691 // | | |
13692 // | +-----v----+ |
13693 // +---+ BlockMBB | |
13694 // +----------+ |
13695 // |
13696 // +---------+ |
13697 // | TailMBB +<--+
13698 // +---------+
13699 // In MBB, calculate previous frame pointer and final stack pointer.
13700 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13701 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13702 // TailMBB is spliced via \p MI.
13703 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13704 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13705 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13706
13707 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13708 MF->insert(MBBIter, TestMBB);
13709 MF->insert(MBBIter, BlockMBB);
13710 MF->insert(MBBIter, TailMBB);
13711
13712 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13713 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13714
13715 Register DstReg = MI.getOperand(0).getReg();
13716 Register NegSizeReg = MI.getOperand(1).getReg();
13717 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13718 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13719 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13720 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13721
13722 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13723 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13724 // NegSize.
13725 unsigned ProbeOpc;
13726 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13727 ProbeOpc =
13728 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13729 else
13730 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13731 // and NegSizeReg will be allocated in the same phyreg to avoid
13732 // redundant copy when NegSizeReg has only one use which is current MI and
13733 // will be replaced by PREPARE_PROBED_ALLOCA then.
13734 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13735 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13736 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13737 .addDef(ActualNegSizeReg)
13738 .addReg(NegSizeReg)
13739 .add(MI.getOperand(2))
13740 .add(MI.getOperand(3));
13741
13742 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13743 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13744 FinalStackPtr)
13745 .addReg(SPReg)
13746 .addReg(ActualNegSizeReg);
13747
13748 // Materialize a scratch register for update.
13749 int64_t NegProbeSize = -(int64_t)ProbeSize;
13750 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13751 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13752 if (!isInt<16>(NegProbeSize)) {
13753 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13754 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13755 .addImm(NegProbeSize >> 16);
13756 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13757 ScratchReg)
13758 .addReg(TempReg)
13759 .addImm(NegProbeSize & 0xFFFF);
13760 } else
13761 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13762 .addImm(NegProbeSize);
13763
13764 {
13765 // Probing leading residual part.
13766 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13767 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13768 .addReg(ActualNegSizeReg)
13769 .addReg(ScratchReg);
13770 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13771 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13772 .addReg(Div)
13773 .addReg(ScratchReg);
13774 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13775 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13776 .addReg(Mul)
13777 .addReg(ActualNegSizeReg);
13778 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13779 .addReg(FramePointer)
13780 .addReg(SPReg)
13781 .addReg(NegMod);
13782 }
13783
13784 {
13785 // Remaining part should be multiple of ProbeSize.
13786 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13787 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13788 .addReg(SPReg)
13789 .addReg(FinalStackPtr);
13790 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13792 .addReg(CmpResult)
13793 .addMBB(TailMBB);
13794 TestMBB->addSuccessor(BlockMBB);
13795 TestMBB->addSuccessor(TailMBB);
13796 }
13797
13798 {
13799 // Touch the block.
13800 // |P...|P...|P...
13801 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13802 .addReg(FramePointer)
13803 .addReg(SPReg)
13804 .addReg(ScratchReg);
13805 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13806 BlockMBB->addSuccessor(TestMBB);
13807 }
13808
13809 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13810 // DYNAREAOFFSET pseudo instruction to get the future result.
13811 Register MaxCallFrameSizeReg =
13812 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13813 BuildMI(TailMBB, DL,
13814 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13815 MaxCallFrameSizeReg)
13816 .add(MI.getOperand(2))
13817 .add(MI.getOperand(3));
13818 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13819 .addReg(SPReg)
13820 .addReg(MaxCallFrameSizeReg);
13821
13822 // Splice instructions after MI to TailMBB.
13823 TailMBB->splice(TailMBB->end(), MBB,
13824 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13826 MBB->addSuccessor(TestMBB);
13827
13828 // Delete the pseudo instruction.
13829 MI.eraseFromParent();
13830
13831 ++NumDynamicAllocaProbed;
13832 return TailMBB;
13833}
13834
13836 switch (MI.getOpcode()) {
13837 case PPC::SELECT_CC_I4:
13838 case PPC::SELECT_CC_I8:
13839 case PPC::SELECT_CC_F4:
13840 case PPC::SELECT_CC_F8:
13841 case PPC::SELECT_CC_F16:
13842 case PPC::SELECT_CC_VRRC:
13843 case PPC::SELECT_CC_VSFRC:
13844 case PPC::SELECT_CC_VSSRC:
13845 case PPC::SELECT_CC_VSRC:
13846 case PPC::SELECT_CC_SPE4:
13847 case PPC::SELECT_CC_SPE:
13848 return true;
13849 default:
13850 return false;
13851 }
13852}
13853
13854static bool IsSelect(MachineInstr &MI) {
13855 switch (MI.getOpcode()) {
13856 case PPC::SELECT_I4:
13857 case PPC::SELECT_I8:
13858 case PPC::SELECT_F4:
13859 case PPC::SELECT_F8:
13860 case PPC::SELECT_F16:
13861 case PPC::SELECT_SPE:
13862 case PPC::SELECT_SPE4:
13863 case PPC::SELECT_VRRC:
13864 case PPC::SELECT_VSFRC:
13865 case PPC::SELECT_VSSRC:
13866 case PPC::SELECT_VSRC:
13867 return true;
13868 default:
13869 return false;
13870 }
13871}
13872
13875 MachineBasicBlock *BB) const {
13876 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13877 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13878 if (Subtarget.is64BitELFABI() &&
13879 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13880 !Subtarget.isUsingPCRelativeCalls()) {
13881 // Call lowering should have added an r2 operand to indicate a dependence
13882 // on the TOC base pointer value. It can't however, because there is no
13883 // way to mark the dependence as implicit there, and so the stackmap code
13884 // will confuse it with a regular operand. Instead, add the dependence
13885 // here.
13886 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13887 }
13888
13889 return emitPatchPoint(MI, BB);
13890 }
13891
13892 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13893 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13894 return emitEHSjLjSetJmp(MI, BB);
13895 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13896 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13897 return emitEHSjLjLongJmp(MI, BB);
13898 }
13899
13900 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13901
13902 // To "insert" these instructions we actually have to insert their
13903 // control-flow patterns.
13904 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13906
13907 MachineFunction *F = BB->getParent();
13908 MachineRegisterInfo &MRI = F->getRegInfo();
13909
13910 if (Subtarget.hasISEL() &&
13911 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13912 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13913 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13915 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13916 MI.getOpcode() == PPC::SELECT_CC_I8)
13917 Cond.push_back(MI.getOperand(4));
13918 else
13920 Cond.push_back(MI.getOperand(1));
13921
13922 DebugLoc dl = MI.getDebugLoc();
13923 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13924 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13925 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13926 // The incoming instruction knows the destination vreg to set, the
13927 // condition code register to branch on, the true/false values to
13928 // select between, and a branch opcode to use.
13929
13930 // thisMBB:
13931 // ...
13932 // TrueVal = ...
13933 // cmpTY ccX, r1, r2
13934 // bCC sinkMBB
13935 // fallthrough --> copy0MBB
13936 MachineBasicBlock *thisMBB = BB;
13937 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13938 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13939 DebugLoc dl = MI.getDebugLoc();
13940 F->insert(It, copy0MBB);
13941 F->insert(It, sinkMBB);
13942
13943 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13944 copy0MBB->addLiveIn(PPC::CARRY);
13945 sinkMBB->addLiveIn(PPC::CARRY);
13946 }
13947
13948 // Set the call frame size on entry to the new basic blocks.
13949 // See https://reviews.llvm.org/D156113.
13950 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13951 copy0MBB->setCallFrameSize(CallFrameSize);
13952 sinkMBB->setCallFrameSize(CallFrameSize);
13953
13954 // Transfer the remainder of BB and its successor edges to sinkMBB.
13955 sinkMBB->splice(sinkMBB->begin(), BB,
13956 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13958
13959 // Next, add the true and fallthrough blocks as its successors.
13960 BB->addSuccessor(copy0MBB);
13961 BB->addSuccessor(sinkMBB);
13962
13963 if (IsSelect(MI)) {
13964 BuildMI(BB, dl, TII->get(PPC::BC))
13965 .addReg(MI.getOperand(1).getReg())
13966 .addMBB(sinkMBB);
13967 } else {
13968 unsigned SelectPred = MI.getOperand(4).getImm();
13969 BuildMI(BB, dl, TII->get(PPC::BCC))
13970 .addImm(SelectPred)
13971 .addReg(MI.getOperand(1).getReg())
13972 .addMBB(sinkMBB);
13973 }
13974
13975 // copy0MBB:
13976 // %FalseValue = ...
13977 // # fallthrough to sinkMBB
13978 BB = copy0MBB;
13979
13980 // Update machine-CFG edges
13981 BB->addSuccessor(sinkMBB);
13982
13983 // sinkMBB:
13984 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13985 // ...
13986 BB = sinkMBB;
13987 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13988 .addReg(MI.getOperand(3).getReg())
13989 .addMBB(copy0MBB)
13990 .addReg(MI.getOperand(2).getReg())
13991 .addMBB(thisMBB);
13992 } else if (MI.getOpcode() == PPC::ReadTB) {
13993 // To read the 64-bit time-base register on a 32-bit target, we read the
13994 // two halves. Should the counter have wrapped while it was being read, we
13995 // need to try again.
13996 // ...
13997 // readLoop:
13998 // mfspr Rx,TBU # load from TBU
13999 // mfspr Ry,TB # load from TB
14000 // mfspr Rz,TBU # load from TBU
14001 // cmpw crX,Rx,Rz # check if 'old'='new'
14002 // bne readLoop # branch if they're not equal
14003 // ...
14004
14005 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14006 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14007 DebugLoc dl = MI.getDebugLoc();
14008 F->insert(It, readMBB);
14009 F->insert(It, sinkMBB);
14010
14011 // Transfer the remainder of BB and its successor edges to sinkMBB.
14012 sinkMBB->splice(sinkMBB->begin(), BB,
14013 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14015
14016 BB->addSuccessor(readMBB);
14017 BB = readMBB;
14018
14019 MachineRegisterInfo &RegInfo = F->getRegInfo();
14020 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14021 Register LoReg = MI.getOperand(0).getReg();
14022 Register HiReg = MI.getOperand(1).getReg();
14023
14024 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14025 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14026 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14027
14028 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14029
14030 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14031 .addReg(HiReg)
14032 .addReg(ReadAgainReg);
14033 BuildMI(BB, dl, TII->get(PPC::BCC))
14035 .addReg(CmpReg)
14036 .addMBB(readMBB);
14037
14038 BB->addSuccessor(readMBB);
14039 BB->addSuccessor(sinkMBB);
14040 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14041 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
14042 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14043 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
14044 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14045 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
14046 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14047 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
14048
14049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14050 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
14051 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14052 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
14053 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14054 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
14055 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14056 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
14057
14058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14059 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
14060 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14061 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14062 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14063 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14064 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14065 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14066
14067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14068 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14069 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14070 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14071 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14072 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14073 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14074 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14075
14076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14077 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14078 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14079 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14080 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14081 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14082 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14083 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14084
14085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14086 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14087 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14088 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14090 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14091 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14092 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14093
14094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14095 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14096 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14097 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14099 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14101 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14102
14103 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14104 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14106 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14108 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14110 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14111
14112 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14113 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14115 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14117 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14119 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14120
14121 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14122 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14124 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14126 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14128 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14129
14130 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14131 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14132 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14133 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14134 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14135 BB = EmitAtomicBinary(MI, BB, 4, 0);
14136 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14137 BB = EmitAtomicBinary(MI, BB, 8, 0);
14138 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14139 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14140 (Subtarget.hasPartwordAtomics() &&
14141 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14142 (Subtarget.hasPartwordAtomics() &&
14143 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14144 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14145
14146 auto LoadMnemonic = PPC::LDARX;
14147 auto StoreMnemonic = PPC::STDCX;
14148 switch (MI.getOpcode()) {
14149 default:
14150 llvm_unreachable("Compare and swap of unknown size");
14151 case PPC::ATOMIC_CMP_SWAP_I8:
14152 LoadMnemonic = PPC::LBARX;
14153 StoreMnemonic = PPC::STBCX;
14154 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14155 break;
14156 case PPC::ATOMIC_CMP_SWAP_I16:
14157 LoadMnemonic = PPC::LHARX;
14158 StoreMnemonic = PPC::STHCX;
14159 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14160 break;
14161 case PPC::ATOMIC_CMP_SWAP_I32:
14162 LoadMnemonic = PPC::LWARX;
14163 StoreMnemonic = PPC::STWCX;
14164 break;
14165 case PPC::ATOMIC_CMP_SWAP_I64:
14166 LoadMnemonic = PPC::LDARX;
14167 StoreMnemonic = PPC::STDCX;
14168 break;
14169 }
14170 MachineRegisterInfo &RegInfo = F->getRegInfo();
14171 Register dest = MI.getOperand(0).getReg();
14172 Register ptrA = MI.getOperand(1).getReg();
14173 Register ptrB = MI.getOperand(2).getReg();
14174 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14175 Register oldval = MI.getOperand(3).getReg();
14176 Register newval = MI.getOperand(4).getReg();
14177 DebugLoc dl = MI.getDebugLoc();
14178
14179 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14180 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14181 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14182 F->insert(It, loop1MBB);
14183 F->insert(It, loop2MBB);
14184 F->insert(It, exitMBB);
14185 exitMBB->splice(exitMBB->begin(), BB,
14186 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14188
14189 // thisMBB:
14190 // ...
14191 // fallthrough --> loopMBB
14192 BB->addSuccessor(loop1MBB);
14193
14194 // loop1MBB:
14195 // l[bhwd]arx dest, ptr
14196 // cmp[wd] dest, oldval
14197 // bne- exitBB
14198 // loop2MBB:
14199 // st[bhwd]cx. newval, ptr
14200 // bne- loopMBB
14201 // b exitBB
14202 // exitBB:
14203 BB = loop1MBB;
14204 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14205 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14206 .addReg(dest)
14207 .addReg(oldval);
14208 BuildMI(BB, dl, TII->get(PPC::BCC))
14210 .addReg(CrReg)
14211 .addMBB(exitMBB);
14212 BB->addSuccessor(loop2MBB);
14213 BB->addSuccessor(exitMBB);
14214
14215 BB = loop2MBB;
14216 BuildMI(BB, dl, TII->get(StoreMnemonic))
14217 .addReg(newval)
14218 .addReg(ptrA)
14219 .addReg(ptrB);
14220 BuildMI(BB, dl, TII->get(PPC::BCC))
14222 .addReg(PPC::CR0)
14223 .addMBB(loop1MBB);
14224 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14225 BB->addSuccessor(loop1MBB);
14226 BB->addSuccessor(exitMBB);
14227
14228 // exitMBB:
14229 // ...
14230 BB = exitMBB;
14231 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14232 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14233 // We must use 64-bit registers for addresses when targeting 64-bit,
14234 // since we're actually doing arithmetic on them. Other registers
14235 // can be 32-bit.
14236 bool is64bit = Subtarget.isPPC64();
14237 bool isLittleEndian = Subtarget.isLittleEndian();
14238 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14239
14240 Register dest = MI.getOperand(0).getReg();
14241 Register ptrA = MI.getOperand(1).getReg();
14242 Register ptrB = MI.getOperand(2).getReg();
14243 Register oldval = MI.getOperand(3).getReg();
14244 Register newval = MI.getOperand(4).getReg();
14245 DebugLoc dl = MI.getDebugLoc();
14246
14247 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14248 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14249 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14250 F->insert(It, loop1MBB);
14251 F->insert(It, loop2MBB);
14252 F->insert(It, exitMBB);
14253 exitMBB->splice(exitMBB->begin(), BB,
14254 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14256
14257 MachineRegisterInfo &RegInfo = F->getRegInfo();
14258 const TargetRegisterClass *RC =
14259 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14260 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14261
14262 Register PtrReg = RegInfo.createVirtualRegister(RC);
14263 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14264 Register ShiftReg =
14265 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14266 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14267 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14268 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14269 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14270 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14271 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14272 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14273 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14274 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14275 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14276 Register Ptr1Reg;
14277 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14278 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14279 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14280 // thisMBB:
14281 // ...
14282 // fallthrough --> loopMBB
14283 BB->addSuccessor(loop1MBB);
14284
14285 // The 4-byte load must be aligned, while a char or short may be
14286 // anywhere in the word. Hence all this nasty bookkeeping code.
14287 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14288 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14289 // xori shift, shift1, 24 [16]
14290 // rlwinm ptr, ptr1, 0, 0, 29
14291 // slw newval2, newval, shift
14292 // slw oldval2, oldval,shift
14293 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14294 // slw mask, mask2, shift
14295 // and newval3, newval2, mask
14296 // and oldval3, oldval2, mask
14297 // loop1MBB:
14298 // lwarx tmpDest, ptr
14299 // and tmp, tmpDest, mask
14300 // cmpw tmp, oldval3
14301 // bne- exitBB
14302 // loop2MBB:
14303 // andc tmp2, tmpDest, mask
14304 // or tmp4, tmp2, newval3
14305 // stwcx. tmp4, ptr
14306 // bne- loop1MBB
14307 // b exitBB
14308 // exitBB:
14309 // srw dest, tmpDest, shift
14310 if (ptrA != ZeroReg) {
14311 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14312 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14313 .addReg(ptrA)
14314 .addReg(ptrB);
14315 } else {
14316 Ptr1Reg = ptrB;
14317 }
14318
14319 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14320 // mode.
14321 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14322 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14323 .addImm(3)
14324 .addImm(27)
14325 .addImm(is8bit ? 28 : 27);
14326 if (!isLittleEndian)
14327 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14328 .addReg(Shift1Reg)
14329 .addImm(is8bit ? 24 : 16);
14330 if (is64bit)
14331 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14332 .addReg(Ptr1Reg)
14333 .addImm(0)
14334 .addImm(61);
14335 else
14336 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14337 .addReg(Ptr1Reg)
14338 .addImm(0)
14339 .addImm(0)
14340 .addImm(29);
14341 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14342 .addReg(newval)
14343 .addReg(ShiftReg);
14344 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14345 .addReg(oldval)
14346 .addReg(ShiftReg);
14347 if (is8bit)
14348 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14349 else {
14350 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14351 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14352 .addReg(Mask3Reg)
14353 .addImm(65535);
14354 }
14355 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14356 .addReg(Mask2Reg)
14357 .addReg(ShiftReg);
14358 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14359 .addReg(NewVal2Reg)
14360 .addReg(MaskReg);
14361 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14362 .addReg(OldVal2Reg)
14363 .addReg(MaskReg);
14364
14365 BB = loop1MBB;
14366 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14367 .addReg(ZeroReg)
14368 .addReg(PtrReg);
14369 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14370 .addReg(TmpDestReg)
14371 .addReg(MaskReg);
14372 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14373 .addReg(TmpReg)
14374 .addReg(OldVal3Reg);
14375 BuildMI(BB, dl, TII->get(PPC::BCC))
14377 .addReg(CrReg)
14378 .addMBB(exitMBB);
14379 BB->addSuccessor(loop2MBB);
14380 BB->addSuccessor(exitMBB);
14381
14382 BB = loop2MBB;
14383 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14384 .addReg(TmpDestReg)
14385 .addReg(MaskReg);
14386 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14387 .addReg(Tmp2Reg)
14388 .addReg(NewVal3Reg);
14389 BuildMI(BB, dl, TII->get(PPC::STWCX))
14390 .addReg(Tmp4Reg)
14391 .addReg(ZeroReg)
14392 .addReg(PtrReg);
14393 BuildMI(BB, dl, TII->get(PPC::BCC))
14395 .addReg(PPC::CR0)
14396 .addMBB(loop1MBB);
14397 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14398 BB->addSuccessor(loop1MBB);
14399 BB->addSuccessor(exitMBB);
14400
14401 // exitMBB:
14402 // ...
14403 BB = exitMBB;
14404 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14405 .addReg(TmpReg)
14406 .addReg(ShiftReg);
14407 } else if (MI.getOpcode() == PPC::FADDrtz) {
14408 // This pseudo performs an FADD with rounding mode temporarily forced
14409 // to round-to-zero. We emit this via custom inserter since the FPSCR
14410 // is not modeled at the SelectionDAG level.
14411 Register Dest = MI.getOperand(0).getReg();
14412 Register Src1 = MI.getOperand(1).getReg();
14413 Register Src2 = MI.getOperand(2).getReg();
14414 DebugLoc dl = MI.getDebugLoc();
14415
14416 MachineRegisterInfo &RegInfo = F->getRegInfo();
14417 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14418
14419 // Save FPSCR value.
14420 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14421
14422 // Set rounding mode to round-to-zero.
14423 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14424 .addImm(31)
14426
14427 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14428 .addImm(30)
14430
14431 // Perform addition.
14432 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14433 .addReg(Src1)
14434 .addReg(Src2);
14435 if (MI.getFlag(MachineInstr::NoFPExcept))
14437
14438 // Restore FPSCR value.
14439 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14440 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14441 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14442 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14443 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14444 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14445 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14446 ? PPC::ANDI8_rec
14447 : PPC::ANDI_rec;
14448 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14449 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14450
14451 MachineRegisterInfo &RegInfo = F->getRegInfo();
14452 Register Dest = RegInfo.createVirtualRegister(
14453 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14454
14455 DebugLoc Dl = MI.getDebugLoc();
14456 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14457 .addReg(MI.getOperand(1).getReg())
14458 .addImm(1);
14459 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14460 MI.getOperand(0).getReg())
14461 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14462 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14463 DebugLoc Dl = MI.getDebugLoc();
14464 MachineRegisterInfo &RegInfo = F->getRegInfo();
14465 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14466 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14467 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14468 MI.getOperand(0).getReg())
14469 .addReg(CRReg);
14470 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14471 DebugLoc Dl = MI.getDebugLoc();
14472 unsigned Imm = MI.getOperand(1).getImm();
14473 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14474 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14475 MI.getOperand(0).getReg())
14476 .addReg(PPC::CR0EQ);
14477 } else if (MI.getOpcode() == PPC::SETRNDi) {
14478 DebugLoc dl = MI.getDebugLoc();
14479 Register OldFPSCRReg = MI.getOperand(0).getReg();
14480
14481 // Save FPSCR value.
14482 if (MRI.use_empty(OldFPSCRReg))
14483 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14484 else
14485 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14486
14487 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14488 // the following settings:
14489 // 00 Round to nearest
14490 // 01 Round to 0
14491 // 10 Round to +inf
14492 // 11 Round to -inf
14493
14494 // When the operand is immediate, using the two least significant bits of
14495 // the immediate to set the bits 62:63 of FPSCR.
14496 unsigned Mode = MI.getOperand(1).getImm();
14497 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14498 .addImm(31)
14500
14501 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14502 .addImm(30)
14504 } else if (MI.getOpcode() == PPC::SETRND) {
14505 DebugLoc dl = MI.getDebugLoc();
14506
14507 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14508 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14509 // If the target doesn't have DirectMove, we should use stack to do the
14510 // conversion, because the target doesn't have the instructions like mtvsrd
14511 // or mfvsrd to do this conversion directly.
14512 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14513 if (Subtarget.hasDirectMove()) {
14514 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14515 .addReg(SrcReg);
14516 } else {
14517 // Use stack to do the register copy.
14518 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14519 MachineRegisterInfo &RegInfo = F->getRegInfo();
14520 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14521 if (RC == &PPC::F8RCRegClass) {
14522 // Copy register from F8RCRegClass to G8RCRegclass.
14523 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14524 "Unsupported RegClass.");
14525
14526 StoreOp = PPC::STFD;
14527 LoadOp = PPC::LD;
14528 } else {
14529 // Copy register from G8RCRegClass to F8RCRegclass.
14530 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14531 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14532 "Unsupported RegClass.");
14533 }
14534
14535 MachineFrameInfo &MFI = F->getFrameInfo();
14536 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14537
14538 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14539 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14541 MFI.getObjectAlign(FrameIdx));
14542
14543 // Store the SrcReg into the stack.
14544 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14545 .addReg(SrcReg)
14546 .addImm(0)
14547 .addFrameIndex(FrameIdx)
14548 .addMemOperand(MMOStore);
14549
14550 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14551 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14553 MFI.getObjectAlign(FrameIdx));
14554
14555 // Load from the stack where SrcReg is stored, and save to DestReg,
14556 // so we have done the RegClass conversion from RegClass::SrcReg to
14557 // RegClass::DestReg.
14558 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14559 .addImm(0)
14560 .addFrameIndex(FrameIdx)
14561 .addMemOperand(MMOLoad);
14562 }
14563 };
14564
14565 Register OldFPSCRReg = MI.getOperand(0).getReg();
14566
14567 // Save FPSCR value.
14568 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14569
14570 // When the operand is gprc register, use two least significant bits of the
14571 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14572 //
14573 // copy OldFPSCRTmpReg, OldFPSCRReg
14574 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14575 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14576 // copy NewFPSCRReg, NewFPSCRTmpReg
14577 // mtfsf 255, NewFPSCRReg
14578 MachineOperand SrcOp = MI.getOperand(1);
14579 MachineRegisterInfo &RegInfo = F->getRegInfo();
14580 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14581
14582 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14583
14584 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14585 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14586
14587 // The first operand of INSERT_SUBREG should be a register which has
14588 // subregisters, we only care about its RegClass, so we should use an
14589 // IMPLICIT_DEF register.
14590 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14591 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14592 .addReg(ImDefReg)
14593 .add(SrcOp)
14594 .addImm(1);
14595
14596 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14597 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14598 .addReg(OldFPSCRTmpReg)
14599 .addReg(ExtSrcReg)
14600 .addImm(0)
14601 .addImm(62);
14602
14603 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14604 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14605
14606 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14607 // bits of FPSCR.
14608 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14609 .addImm(255)
14610 .addReg(NewFPSCRReg)
14611 .addImm(0)
14612 .addImm(0);
14613 } else if (MI.getOpcode() == PPC::SETFLM) {
14614 DebugLoc Dl = MI.getDebugLoc();
14615
14616 // Result of setflm is previous FPSCR content, so we need to save it first.
14617 Register OldFPSCRReg = MI.getOperand(0).getReg();
14618 if (MRI.use_empty(OldFPSCRReg))
14619 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14620 else
14621 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14622
14623 // Put bits in 32:63 to FPSCR.
14624 Register NewFPSCRReg = MI.getOperand(1).getReg();
14625 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14626 .addImm(255)
14627 .addReg(NewFPSCRReg)
14628 .addImm(0)
14629 .addImm(0);
14630 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14631 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14632 return emitProbedAlloca(MI, BB);
14633 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14634 DebugLoc DL = MI.getDebugLoc();
14635 Register Src = MI.getOperand(2).getReg();
14636 Register Lo = MI.getOperand(0).getReg();
14637 Register Hi = MI.getOperand(1).getReg();
14638 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14639 .addDef(Lo)
14640 .addUse(Src, {}, PPC::sub_gp8_x1);
14641 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14642 .addDef(Hi)
14643 .addUse(Src, {}, PPC::sub_gp8_x0);
14644 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14645 MI.getOpcode() == PPC::STQX_PSEUDO) {
14646 DebugLoc DL = MI.getDebugLoc();
14647 // Ptr is used as the ptr_rc_no_r0 part
14648 // of LQ/STQ's memory operand and adding result of RA and RB,
14649 // so it has to be g8rc_and_g8rc_nox0.
14650 Register Ptr =
14651 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14652 Register Val = MI.getOperand(0).getReg();
14653 Register RA = MI.getOperand(1).getReg();
14654 Register RB = MI.getOperand(2).getReg();
14655 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14656 BuildMI(*BB, MI, DL,
14657 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14658 : TII->get(PPC::STQ))
14659 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14660 .addImm(0)
14661 .addReg(Ptr);
14662 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14663 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14664 DebugLoc DL = MI.getDebugLoc();
14665 Register DstReg = MI.getOperand(0).getReg();
14666 Register PtrReg = MI.getOperand(1).getReg();
14667 Register ValReg = MI.getOperand(2).getReg();
14668 unsigned FC = MI.getOperand(3).getImm();
14669 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14670 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14671 if (IsLwat)
14672 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14673 .addReg(ValReg)
14674 .addImm(PPC::sub_32);
14675 else
14676 Val64 = ValReg;
14677
14678 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14679 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14680 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14681 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14682 .addReg(UndefG8r)
14683 .addImm(PPC::sub_gp8_x0)
14684 .addReg(Val64)
14685 .addImm(PPC::sub_gp8_x1);
14686
14687 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14688 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14689 .addReg(G8rPair)
14690 .addReg(PtrReg)
14691 .addImm(FC);
14692 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14693 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14694 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14695 if (IsLwat)
14696 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14697 .addReg(Result64, {}, PPC::sub_32);
14698 else
14699 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14700 .addReg(Result64);
14701 } else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO ||
14702 MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14703 DebugLoc DL = MI.getDebugLoc();
14704 Register DstReg = MI.getOperand(0).getReg();
14705 Register PtrReg = MI.getOperand(1).getReg();
14706 unsigned FC = MI.getOperand(2).getImm();
14707 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14708
14709 Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14710 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
14711
14712 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14713 BuildMI(*BB, MI, DL, TII->get(IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14714 PairResult)
14715 .addReg(Pair)
14716 .addReg(PtrReg)
14717 .addImm(FC);
14718 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14719 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14720 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14721 if (IsLwat_Cond)
14722 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14723 .addReg(Result64, {}, PPC::sub_32);
14724 else
14725 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14726 .addReg(Result64);
14727 } else {
14728 llvm_unreachable("Unexpected instr type to insert");
14729 }
14730
14731 MI.eraseFromParent(); // The pseudo instruction is gone now.
14732 return BB;
14733}
14734
14735//===----------------------------------------------------------------------===//
14736// Target Optimization Hooks
14737//===----------------------------------------------------------------------===//
14738
14739static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14740 // For the estimates, convergence is quadratic, so we essentially double the
14741 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14742 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14743 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14744 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14745 if (VT.getScalarType() == MVT::f64)
14746 RefinementSteps++;
14747 return RefinementSteps;
14748}
14749
14750SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14751 const DenormalMode &Mode,
14752 SDNodeFlags Flags) const {
14753 // We only have VSX Vector Test for software Square Root.
14754 EVT VT = Op.getValueType();
14755 if (!isTypeLegal(MVT::i1) ||
14756 (VT != MVT::f64 &&
14757 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14758 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14759
14760 SDLoc DL(Op);
14761 // The output register of FTSQRT is CR field.
14762 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14763 // ftsqrt BF,FRB
14764 // Let e_b be the unbiased exponent of the double-precision
14765 // floating-point operand in register FRB.
14766 // fe_flag is set to 1 if either of the following conditions occurs.
14767 // - The double-precision floating-point operand in register FRB is a zero,
14768 // a NaN, or an infinity, or a negative value.
14769 // - e_b is less than or equal to -970.
14770 // Otherwise fe_flag is set to 0.
14771 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14772 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14773 // exponent is less than -970)
14774 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14775 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14776 FTSQRT, SRIdxVal),
14777 0);
14778}
14779
14780SDValue
14781PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14782 SelectionDAG &DAG) const {
14783 // We only have VSX Vector Square Root.
14784 EVT VT = Op.getValueType();
14785 if (VT != MVT::f64 &&
14786 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14788
14789 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14790}
14791
14792SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14793 int Enabled, int &RefinementSteps,
14794 bool &UseOneConstNR,
14795 bool Reciprocal) const {
14796 EVT VT = Operand.getValueType();
14797 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14798 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14799 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14800 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14801 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14802 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14803
14804 // The Newton-Raphson computation with a single constant does not provide
14805 // enough accuracy on some CPUs.
14806 UseOneConstNR = !Subtarget.needsTwoConstNR();
14807 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14808 }
14809 return SDValue();
14810}
14811
14812SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14813 int Enabled,
14814 int &RefinementSteps) const {
14815 EVT VT = Operand.getValueType();
14816 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14817 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14818 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14819 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14820 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14821 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14822 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14823 }
14824 return SDValue();
14825}
14826
14828 // Note: This functionality is used only when arcp is enabled, and
14829 // on cores with reciprocal estimates (which are used when arcp is
14830 // enabled for division), this functionality is redundant with the default
14831 // combiner logic (once the division -> reciprocal/multiply transformation
14832 // has taken place). As a result, this matters more for older cores than for
14833 // newer ones.
14834
14835 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14836 // reciprocal if there are two or more FDIVs (for embedded cores with only
14837 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14838 switch (Subtarget.getCPUDirective()) {
14839 default:
14840 return 3;
14841 case PPC::DIR_440:
14842 case PPC::DIR_A2:
14843 case PPC::DIR_E500:
14844 case PPC::DIR_E500mc:
14845 case PPC::DIR_E5500:
14846 return 2;
14847 }
14848}
14849
14850// isConsecutiveLSLoc needs to work even if all adds have not yet been
14851// collapsed, and so we need to look through chains of them.
14853 int64_t& Offset, SelectionDAG &DAG) {
14854 if (DAG.isBaseWithConstantOffset(Loc)) {
14855 Base = Loc.getOperand(0);
14856 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14857
14858 // The base might itself be a base plus an offset, and if so, accumulate
14859 // that as well.
14860 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14861 }
14862}
14863
14865 unsigned Bytes, int Dist,
14866 SelectionDAG &DAG) {
14867 if (VT.getSizeInBits() / 8 != Bytes)
14868 return false;
14869
14870 SDValue BaseLoc = Base->getBasePtr();
14871 if (Loc.getOpcode() == ISD::FrameIndex) {
14872 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14873 return false;
14875 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14876 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14877 int FS = MFI.getObjectSize(FI);
14878 int BFS = MFI.getObjectSize(BFI);
14879 if (FS != BFS || FS != (int)Bytes) return false;
14880 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14881 }
14882
14883 SDValue Base1 = Loc, Base2 = BaseLoc;
14884 int64_t Offset1 = 0, Offset2 = 0;
14885 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14886 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14887 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14888 return true;
14889
14890 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14891 const GlobalValue *GV1 = nullptr;
14892 const GlobalValue *GV2 = nullptr;
14893 Offset1 = 0;
14894 Offset2 = 0;
14895 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14896 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14897 if (isGA1 && isGA2 && GV1 == GV2)
14898 return Offset1 == (Offset2 + Dist*Bytes);
14899 return false;
14900}
14901
14902// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14903// not enforce equality of the chain operands.
14905 unsigned Bytes, int Dist,
14906 SelectionDAG &DAG) {
14908 EVT VT = LS->getMemoryVT();
14909 SDValue Loc = LS->getBasePtr();
14910 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14911 }
14912
14913 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14914 EVT VT;
14915 switch (N->getConstantOperandVal(1)) {
14916 default: return false;
14917 case Intrinsic::ppc_altivec_lvx:
14918 case Intrinsic::ppc_altivec_lvxl:
14919 case Intrinsic::ppc_vsx_lxvw4x:
14920 case Intrinsic::ppc_vsx_lxvw4x_be:
14921 VT = MVT::v4i32;
14922 break;
14923 case Intrinsic::ppc_vsx_lxvd2x:
14924 case Intrinsic::ppc_vsx_lxvd2x_be:
14925 VT = MVT::v2f64;
14926 break;
14927 case Intrinsic::ppc_altivec_lvebx:
14928 VT = MVT::i8;
14929 break;
14930 case Intrinsic::ppc_altivec_lvehx:
14931 VT = MVT::i16;
14932 break;
14933 case Intrinsic::ppc_altivec_lvewx:
14934 VT = MVT::i32;
14935 break;
14936 }
14937
14938 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14939 }
14940
14941 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14942 EVT VT;
14943 switch (N->getConstantOperandVal(1)) {
14944 default: return false;
14945 case Intrinsic::ppc_altivec_stvx:
14946 case Intrinsic::ppc_altivec_stvxl:
14947 case Intrinsic::ppc_vsx_stxvw4x:
14948 VT = MVT::v4i32;
14949 break;
14950 case Intrinsic::ppc_vsx_stxvd2x:
14951 VT = MVT::v2f64;
14952 break;
14953 case Intrinsic::ppc_vsx_stxvw4x_be:
14954 VT = MVT::v4i32;
14955 break;
14956 case Intrinsic::ppc_vsx_stxvd2x_be:
14957 VT = MVT::v2f64;
14958 break;
14959 case Intrinsic::ppc_altivec_stvebx:
14960 VT = MVT::i8;
14961 break;
14962 case Intrinsic::ppc_altivec_stvehx:
14963 VT = MVT::i16;
14964 break;
14965 case Intrinsic::ppc_altivec_stvewx:
14966 VT = MVT::i32;
14967 break;
14968 }
14969
14970 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14971 }
14972
14973 return false;
14974}
14975
14976// Return true is there is a nearyby consecutive load to the one provided
14977// (regardless of alignment). We search up and down the chain, looking though
14978// token factors and other loads (but nothing else). As a result, a true result
14979// indicates that it is safe to create a new consecutive load adjacent to the
14980// load provided.
14982 SDValue Chain = LD->getChain();
14983 EVT VT = LD->getMemoryVT();
14984
14985 SmallPtrSet<SDNode *, 16> LoadRoots;
14986 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14988
14989 // First, search up the chain, branching to follow all token-factor operands.
14990 // If we find a consecutive load, then we're done, otherwise, record all
14991 // nodes just above the top-level loads and token factors.
14992 while (!Queue.empty()) {
14993 SDNode *ChainNext = Queue.pop_back_val();
14994 if (!Visited.insert(ChainNext).second)
14995 continue;
14996
14997 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14998 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14999 return true;
15000
15001 if (!Visited.count(ChainLD->getChain().getNode()))
15002 Queue.push_back(ChainLD->getChain().getNode());
15003 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15004 for (const SDUse &O : ChainNext->ops())
15005 if (!Visited.count(O.getNode()))
15006 Queue.push_back(O.getNode());
15007 } else
15008 LoadRoots.insert(ChainNext);
15009 }
15010
15011 // Second, search down the chain, starting from the top-level nodes recorded
15012 // in the first phase. These top-level nodes are the nodes just above all
15013 // loads and token factors. Starting with their uses, recursively look though
15014 // all loads (just the chain uses) and token factors to find a consecutive
15015 // load.
15016 Visited.clear();
15017 Queue.clear();
15018
15019 for (SDNode *I : LoadRoots) {
15020 Queue.push_back(I);
15021
15022 while (!Queue.empty()) {
15023 SDNode *LoadRoot = Queue.pop_back_val();
15024 if (!Visited.insert(LoadRoot).second)
15025 continue;
15026
15027 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15028 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15029 return true;
15030
15031 for (SDNode *U : LoadRoot->users())
15032 if (((isa<MemSDNode>(U) &&
15033 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15034 U->getOpcode() == ISD::TokenFactor) &&
15035 !Visited.count(U))
15036 Queue.push_back(U);
15037 }
15038 }
15039
15040 return false;
15041}
15042
15043/// This function is called when we have proved that a SETCC node can be replaced
15044/// by subtraction (and other supporting instructions) so that the result of
15045/// comparison is kept in a GPR instead of CR. This function is purely for
15046/// codegen purposes and has some flags to guide the codegen process.
15047static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15048 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15049 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15050
15051 // Zero extend the operands to the largest legal integer. Originally, they
15052 // must be of a strictly smaller size.
15053 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15054 DAG.getConstant(Size, DL, MVT::i32));
15055 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15056 DAG.getConstant(Size, DL, MVT::i32));
15057
15058 // Swap if needed. Depends on the condition code.
15059 if (Swap)
15060 std::swap(Op0, Op1);
15061
15062 // Subtract extended integers.
15063 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15064
15065 // Move the sign bit to the least significant position and zero out the rest.
15066 // Now the least significant bit carries the result of original comparison.
15067 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15068 DAG.getConstant(Size - 1, DL, MVT::i32));
15069 auto Final = Shifted;
15070
15071 // Complement the result if needed. Based on the condition code.
15072 if (Complement)
15073 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15074 DAG.getConstant(1, DL, MVT::i64));
15075
15076 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15077}
15078
15079SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15080 DAGCombinerInfo &DCI) const {
15081 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15082
15083 SelectionDAG &DAG = DCI.DAG;
15084 SDLoc DL(N);
15085
15086 // Size of integers being compared has a critical role in the following
15087 // analysis, so we prefer to do this when all types are legal.
15088 if (!DCI.isAfterLegalizeDAG())
15089 return SDValue();
15090
15091 // If all users of SETCC extend its value to a legal integer type
15092 // then we replace SETCC with a subtraction
15093 for (const SDNode *U : N->users())
15094 if (U->getOpcode() != ISD::ZERO_EXTEND)
15095 return SDValue();
15096
15097 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15098 auto OpSize = N->getOperand(0).getValueSizeInBits();
15099
15101
15102 if (OpSize < Size) {
15103 switch (CC) {
15104 default: break;
15105 case ISD::SETULT:
15106 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15107 case ISD::SETULE:
15108 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15109 case ISD::SETUGT:
15110 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15111 case ISD::SETUGE:
15112 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15113 }
15114 }
15115
15116 return SDValue();
15117}
15118
15119SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15120 DAGCombinerInfo &DCI) const {
15121 SelectionDAG &DAG = DCI.DAG;
15122 SDLoc dl(N);
15123
15124 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15125 // If we're tracking CR bits, we need to be careful that we don't have:
15126 // trunc(binary-ops(zext(x), zext(y)))
15127 // or
15128 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15129 // such that we're unnecessarily moving things into GPRs when it would be
15130 // better to keep them in CR bits.
15131
15132 // Note that trunc here can be an actual i1 trunc, or can be the effective
15133 // truncation that comes from a setcc or select_cc.
15134 if (N->getOpcode() == ISD::TRUNCATE &&
15135 N->getValueType(0) != MVT::i1)
15136 return SDValue();
15137
15138 if (N->getOperand(0).getValueType() != MVT::i32 &&
15139 N->getOperand(0).getValueType() != MVT::i64)
15140 return SDValue();
15141
15142 if (N->getOpcode() == ISD::SETCC ||
15143 N->getOpcode() == ISD::SELECT_CC) {
15144 // If we're looking at a comparison, then we need to make sure that the
15145 // high bits (all except for the first) don't matter the result.
15146 ISD::CondCode CC =
15147 cast<CondCodeSDNode>(N->getOperand(
15148 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15149 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15150
15151 if (ISD::isSignedIntSetCC(CC)) {
15152 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15153 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15154 return SDValue();
15155 } else if (ISD::isUnsignedIntSetCC(CC)) {
15156 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15157 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15158 !DAG.MaskedValueIsZero(N->getOperand(1),
15159 APInt::getHighBitsSet(OpBits, OpBits-1)))
15160 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15161 : SDValue());
15162 } else {
15163 // This is neither a signed nor an unsigned comparison, just make sure
15164 // that the high bits are equal.
15165 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15166 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15167
15168 // We don't really care about what is known about the first bit (if
15169 // anything), so pretend that it is known zero for both to ensure they can
15170 // be compared as constants.
15171 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15172 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15173
15174 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15175 Op1Known.getConstant() != Op2Known.getConstant())
15176 return SDValue();
15177 }
15178 }
15179
15180 // We now know that the higher-order bits are irrelevant, we just need to
15181 // make sure that all of the intermediate operations are bit operations, and
15182 // all inputs are extensions.
15183 if (N->getOperand(0).getOpcode() != ISD::AND &&
15184 N->getOperand(0).getOpcode() != ISD::OR &&
15185 N->getOperand(0).getOpcode() != ISD::XOR &&
15186 N->getOperand(0).getOpcode() != ISD::SELECT &&
15187 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15188 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15189 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15190 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15191 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15192 return SDValue();
15193
15194 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15195 N->getOperand(1).getOpcode() != ISD::AND &&
15196 N->getOperand(1).getOpcode() != ISD::OR &&
15197 N->getOperand(1).getOpcode() != ISD::XOR &&
15198 N->getOperand(1).getOpcode() != ISD::SELECT &&
15199 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15200 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15201 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15202 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15203 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15204 return SDValue();
15205
15207 SmallVector<SDValue, 8> BinOps, PromOps;
15208 SmallPtrSet<SDNode *, 16> Visited;
15209
15210 for (unsigned i = 0; i < 2; ++i) {
15211 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15212 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15213 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15214 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15215 isa<ConstantSDNode>(N->getOperand(i)))
15216 Inputs.push_back(N->getOperand(i));
15217 else
15218 BinOps.push_back(N->getOperand(i));
15219
15220 if (N->getOpcode() == ISD::TRUNCATE)
15221 break;
15222 }
15223
15224 // Visit all inputs, collect all binary operations (and, or, xor and
15225 // select) that are all fed by extensions.
15226 while (!BinOps.empty()) {
15227 SDValue BinOp = BinOps.pop_back_val();
15228
15229 if (!Visited.insert(BinOp.getNode()).second)
15230 continue;
15231
15232 PromOps.push_back(BinOp);
15233
15234 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15235 // The condition of the select is not promoted.
15236 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15237 continue;
15238 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15239 continue;
15240
15241 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15242 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15243 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15244 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15245 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15246 Inputs.push_back(BinOp.getOperand(i));
15247 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15248 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15249 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15250 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15251 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15252 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15253 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15254 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15255 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15256 BinOps.push_back(BinOp.getOperand(i));
15257 } else {
15258 // We have an input that is not an extension or another binary
15259 // operation; we'll abort this transformation.
15260 return SDValue();
15261 }
15262 }
15263 }
15264
15265 // Make sure that this is a self-contained cluster of operations (which
15266 // is not quite the same thing as saying that everything has only one
15267 // use).
15268 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15269 if (isa<ConstantSDNode>(Inputs[i]))
15270 continue;
15271
15272 for (const SDNode *User : Inputs[i].getNode()->users()) {
15273 if (User != N && !Visited.count(User))
15274 return SDValue();
15275
15276 // Make sure that we're not going to promote the non-output-value
15277 // operand(s) or SELECT or SELECT_CC.
15278 // FIXME: Although we could sometimes handle this, and it does occur in
15279 // practice that one of the condition inputs to the select is also one of
15280 // the outputs, we currently can't deal with this.
15281 if (User->getOpcode() == ISD::SELECT) {
15282 if (User->getOperand(0) == Inputs[i])
15283 return SDValue();
15284 } else if (User->getOpcode() == ISD::SELECT_CC) {
15285 if (User->getOperand(0) == Inputs[i] ||
15286 User->getOperand(1) == Inputs[i])
15287 return SDValue();
15288 }
15289 }
15290 }
15291
15292 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15293 for (const SDNode *User : PromOps[i].getNode()->users()) {
15294 if (User != N && !Visited.count(User))
15295 return SDValue();
15296
15297 // Make sure that we're not going to promote the non-output-value
15298 // operand(s) or SELECT or SELECT_CC.
15299 // FIXME: Although we could sometimes handle this, and it does occur in
15300 // practice that one of the condition inputs to the select is also one of
15301 // the outputs, we currently can't deal with this.
15302 if (User->getOpcode() == ISD::SELECT) {
15303 if (User->getOperand(0) == PromOps[i])
15304 return SDValue();
15305 } else if (User->getOpcode() == ISD::SELECT_CC) {
15306 if (User->getOperand(0) == PromOps[i] ||
15307 User->getOperand(1) == PromOps[i])
15308 return SDValue();
15309 }
15310 }
15311 }
15312
15313 // Replace all inputs with the extension operand.
15314 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15315 // Constants may have users outside the cluster of to-be-promoted nodes,
15316 // and so we need to replace those as we do the promotions.
15317 if (isa<ConstantSDNode>(Inputs[i]))
15318 continue;
15319 else
15320 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15321 }
15322
15323 std::list<HandleSDNode> PromOpHandles;
15324 for (auto &PromOp : PromOps)
15325 PromOpHandles.emplace_back(PromOp);
15326
15327 // Replace all operations (these are all the same, but have a different
15328 // (i1) return type). DAG.getNode will validate that the types of
15329 // a binary operator match, so go through the list in reverse so that
15330 // we've likely promoted both operands first. Any intermediate truncations or
15331 // extensions disappear.
15332 while (!PromOpHandles.empty()) {
15333 SDValue PromOp = PromOpHandles.back().getValue();
15334 PromOpHandles.pop_back();
15335
15336 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15337 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15338 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15339 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15340 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15341 PromOp.getOperand(0).getValueType() != MVT::i1) {
15342 // The operand is not yet ready (see comment below).
15343 PromOpHandles.emplace_front(PromOp);
15344 continue;
15345 }
15346
15347 SDValue RepValue = PromOp.getOperand(0);
15348 if (isa<ConstantSDNode>(RepValue))
15349 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15350
15351 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15352 continue;
15353 }
15354
15355 unsigned C;
15356 switch (PromOp.getOpcode()) {
15357 default: C = 0; break;
15358 case ISD::SELECT: C = 1; break;
15359 case ISD::SELECT_CC: C = 2; break;
15360 }
15361
15362 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15363 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15364 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15365 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15366 // The to-be-promoted operands of this node have not yet been
15367 // promoted (this should be rare because we're going through the
15368 // list backward, but if one of the operands has several users in
15369 // this cluster of to-be-promoted nodes, it is possible).
15370 PromOpHandles.emplace_front(PromOp);
15371 continue;
15372 }
15373
15375
15376 // If there are any constant inputs, make sure they're replaced now.
15377 for (unsigned i = 0; i < 2; ++i)
15378 if (isa<ConstantSDNode>(Ops[C+i]))
15379 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15380
15381 DAG.ReplaceAllUsesOfValueWith(PromOp,
15382 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15383 }
15384
15385 // Now we're left with the initial truncation itself.
15386 if (N->getOpcode() == ISD::TRUNCATE)
15387 return N->getOperand(0);
15388
15389 // Otherwise, this is a comparison. The operands to be compared have just
15390 // changed type (to i1), but everything else is the same.
15391 return SDValue(N, 0);
15392}
15393
15394SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15395 DAGCombinerInfo &DCI) const {
15396 SelectionDAG &DAG = DCI.DAG;
15397 SDLoc dl(N);
15398
15399 // If we're tracking CR bits, we need to be careful that we don't have:
15400 // zext(binary-ops(trunc(x), trunc(y)))
15401 // or
15402 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15403 // such that we're unnecessarily moving things into CR bits that can more
15404 // efficiently stay in GPRs. Note that if we're not certain that the high
15405 // bits are set as required by the final extension, we still may need to do
15406 // some masking to get the proper behavior.
15407
15408 // This same functionality is important on PPC64 when dealing with
15409 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15410 // the return values of functions. Because it is so similar, it is handled
15411 // here as well.
15412
15413 if (N->getValueType(0) != MVT::i32 &&
15414 N->getValueType(0) != MVT::i64)
15415 return SDValue();
15416
15417 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15418 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15419 return SDValue();
15420
15421 if (N->getOperand(0).getOpcode() != ISD::AND &&
15422 N->getOperand(0).getOpcode() != ISD::OR &&
15423 N->getOperand(0).getOpcode() != ISD::XOR &&
15424 N->getOperand(0).getOpcode() != ISD::SELECT &&
15425 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15426 return SDValue();
15427
15429 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15430 SmallPtrSet<SDNode *, 16> Visited;
15431
15432 // Visit all inputs, collect all binary operations (and, or, xor and
15433 // select) that are all fed by truncations.
15434 while (!BinOps.empty()) {
15435 SDValue BinOp = BinOps.pop_back_val();
15436
15437 if (!Visited.insert(BinOp.getNode()).second)
15438 continue;
15439
15440 PromOps.push_back(BinOp);
15441
15442 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15443 // The condition of the select is not promoted.
15444 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15445 continue;
15446 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15447 continue;
15448
15449 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15450 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15451 Inputs.push_back(BinOp.getOperand(i));
15452 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15453 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15454 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15455 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15456 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15457 BinOps.push_back(BinOp.getOperand(i));
15458 } else {
15459 // We have an input that is not a truncation or another binary
15460 // operation; we'll abort this transformation.
15461 return SDValue();
15462 }
15463 }
15464 }
15465
15466 // The operands of a select that must be truncated when the select is
15467 // promoted because the operand is actually part of the to-be-promoted set.
15468 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15469
15470 // Make sure that this is a self-contained cluster of operations (which
15471 // is not quite the same thing as saying that everything has only one
15472 // use).
15473 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15474 if (isa<ConstantSDNode>(Inputs[i]))
15475 continue;
15476
15477 for (SDNode *User : Inputs[i].getNode()->users()) {
15478 if (User != N && !Visited.count(User))
15479 return SDValue();
15480
15481 // If we're going to promote the non-output-value operand(s) or SELECT or
15482 // SELECT_CC, record them for truncation.
15483 if (User->getOpcode() == ISD::SELECT) {
15484 if (User->getOperand(0) == Inputs[i])
15485 SelectTruncOp[0].insert(std::make_pair(User,
15486 User->getOperand(0).getValueType()));
15487 } else if (User->getOpcode() == ISD::SELECT_CC) {
15488 if (User->getOperand(0) == Inputs[i])
15489 SelectTruncOp[0].insert(std::make_pair(User,
15490 User->getOperand(0).getValueType()));
15491 if (User->getOperand(1) == Inputs[i])
15492 SelectTruncOp[1].insert(std::make_pair(User,
15493 User->getOperand(1).getValueType()));
15494 }
15495 }
15496 }
15497
15498 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15499 for (SDNode *User : PromOps[i].getNode()->users()) {
15500 if (User != N && !Visited.count(User))
15501 return SDValue();
15502
15503 // If we're going to promote the non-output-value operand(s) or SELECT or
15504 // SELECT_CC, record them for truncation.
15505 if (User->getOpcode() == ISD::SELECT) {
15506 if (User->getOperand(0) == PromOps[i])
15507 SelectTruncOp[0].insert(std::make_pair(User,
15508 User->getOperand(0).getValueType()));
15509 } else if (User->getOpcode() == ISD::SELECT_CC) {
15510 if (User->getOperand(0) == PromOps[i])
15511 SelectTruncOp[0].insert(std::make_pair(User,
15512 User->getOperand(0).getValueType()));
15513 if (User->getOperand(1) == PromOps[i])
15514 SelectTruncOp[1].insert(std::make_pair(User,
15515 User->getOperand(1).getValueType()));
15516 }
15517 }
15518 }
15519
15520 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15521 bool ReallyNeedsExt = false;
15522 if (N->getOpcode() != ISD::ANY_EXTEND) {
15523 // If all of the inputs are not already sign/zero extended, then
15524 // we'll still need to do that at the end.
15525 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15526 if (isa<ConstantSDNode>(Inputs[i]))
15527 continue;
15528
15529 unsigned OpBits =
15530 Inputs[i].getOperand(0).getValueSizeInBits();
15531 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15532
15533 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15534 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15535 APInt::getHighBitsSet(OpBits,
15536 OpBits-PromBits))) ||
15537 (N->getOpcode() == ISD::SIGN_EXTEND &&
15538 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15539 (OpBits-(PromBits-1)))) {
15540 ReallyNeedsExt = true;
15541 break;
15542 }
15543 }
15544 }
15545
15546 // Convert PromOps to handles before doing any RAUW operations, as these
15547 // may CSE with existing nodes, deleting the originals.
15548 std::list<HandleSDNode> PromOpHandles;
15549 for (auto &PromOp : PromOps)
15550 PromOpHandles.emplace_back(PromOp);
15551
15552 // Replace all inputs, either with the truncation operand, or a
15553 // truncation or extension to the final output type.
15554 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15555 // Constant inputs need to be replaced with the to-be-promoted nodes that
15556 // use them because they might have users outside of the cluster of
15557 // promoted nodes.
15558 if (isa<ConstantSDNode>(Inputs[i]))
15559 continue;
15560
15561 SDValue InSrc = Inputs[i].getOperand(0);
15562 if (Inputs[i].getValueType() == N->getValueType(0))
15563 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15564 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15565 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15566 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15567 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15568 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15569 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15570 else
15571 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15572 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15573 }
15574
15575 // Replace all operations (these are all the same, but have a different
15576 // (promoted) return type). DAG.getNode will validate that the types of
15577 // a binary operator match, so go through the list in reverse so that
15578 // we've likely promoted both operands first.
15579 while (!PromOpHandles.empty()) {
15580 SDValue PromOp = PromOpHandles.back().getValue();
15581 PromOpHandles.pop_back();
15582
15583 unsigned C;
15584 switch (PromOp.getOpcode()) {
15585 default: C = 0; break;
15586 case ISD::SELECT: C = 1; break;
15587 case ISD::SELECT_CC: C = 2; break;
15588 }
15589
15590 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15591 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15592 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15593 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15594 // The to-be-promoted operands of this node have not yet been
15595 // promoted (this should be rare because we're going through the
15596 // list backward, but if one of the operands has several users in
15597 // this cluster of to-be-promoted nodes, it is possible).
15598 PromOpHandles.emplace_front(PromOp);
15599 continue;
15600 }
15601
15602 // For SELECT and SELECT_CC nodes, we do a similar check for any
15603 // to-be-promoted comparison inputs.
15604 if (PromOp.getOpcode() == ISD::SELECT ||
15605 PromOp.getOpcode() == ISD::SELECT_CC) {
15606 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15607 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15608 (SelectTruncOp[1].count(PromOp.getNode()) &&
15609 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15610 PromOpHandles.emplace_front(PromOp);
15611 continue;
15612 }
15613 }
15614
15616
15617 // If this node has constant inputs, then they'll need to be promoted here.
15618 for (unsigned i = 0; i < 2; ++i) {
15619 if (!isa<ConstantSDNode>(Ops[C+i]))
15620 continue;
15621 if (Ops[C+i].getValueType() == N->getValueType(0))
15622 continue;
15623
15624 if (N->getOpcode() == ISD::SIGN_EXTEND)
15625 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15626 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15627 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15628 else
15629 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15630 }
15631
15632 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15633 // truncate them again to the original value type.
15634 if (PromOp.getOpcode() == ISD::SELECT ||
15635 PromOp.getOpcode() == ISD::SELECT_CC) {
15636 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15637 if (SI0 != SelectTruncOp[0].end())
15638 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15639 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15640 if (SI1 != SelectTruncOp[1].end())
15641 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15642 }
15643
15644 DAG.ReplaceAllUsesOfValueWith(PromOp,
15645 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15646 }
15647
15648 // Now we're left with the initial extension itself.
15649 if (!ReallyNeedsExt)
15650 return N->getOperand(0);
15651
15652 // To zero extend, just mask off everything except for the first bit (in the
15653 // i1 case).
15654 if (N->getOpcode() == ISD::ZERO_EXTEND)
15655 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15657 N->getValueSizeInBits(0), PromBits),
15658 dl, N->getValueType(0)));
15659
15660 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15661 "Invalid extension type");
15662 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15663 SDValue ShiftCst =
15664 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15665 return DAG.getNode(
15666 ISD::SRA, dl, N->getValueType(0),
15667 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15668 ShiftCst);
15669}
15670
15671// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15673
15674 auto isValidForConvert = [](SDValue &Operand) {
15675 if (!Operand.hasOneUse())
15676 return false;
15677
15678 if (Operand.getValueType() != MVT::i128)
15679 return false;
15680
15681 if (Operand.getOpcode() == ISD::Constant)
15682 return true;
15683
15684 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15685 if (!LoadNode)
15686 return false;
15687
15688 // If memory operation is volatile, do not perform any
15689 // optimization or transformation. Volatile operations must be preserved
15690 // as written to ensure correct program behavior, so we return an empty
15691 // SDValue to indicate no action.
15692
15693 if (LoadNode->isVolatile())
15694 return false;
15695
15696 // Only combine loads if both use the unindexed addressing mode.
15697 // PowerPC AltiVec/VMX does not support vector loads or stores with
15698 // pre/post-increment addressing. Indexed modes may imply implicit
15699 // pointer updates, which are not compatible with AltiVec vector
15700 // instructions.
15701 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15702 return false;
15703
15704 // Only combine loads if both are non-extending loads
15705 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15706 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15707 // loaded value's semantics and are not compatible with vector loads.
15708 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15709 return false;
15710
15711 return true;
15712 };
15713
15714 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15715}
15716
15718 const SDLoc &DL) {
15719
15720 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15721
15722 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15723 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15724 "CC mus be ISD::SETNE or ISD::SETEQ");
15725
15726 auto getV16i8Load = [&](const SDValue &Operand) {
15727 if (Operand.getOpcode() == ISD::Constant)
15728 return DAG.getBitcast(MVT::v16i8, Operand);
15729
15730 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15731
15732 auto *LoadNode = cast<LoadSDNode>(Operand);
15733 SDValue NewLoad =
15734 DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15735 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15736 DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1));
15737 return NewLoad;
15738 };
15739
15740 // Following code transforms the DAG
15741 // t0: ch,glue = EntryToken
15742 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15743 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15744 // undef:i64
15745 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15746 // t5: i128,ch =
15747 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15748 // setcc t3, t5, setne:ch
15749 //
15750 // ---->
15751 //
15752 // t0: ch,glue = EntryToken
15753 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15754 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15755 // undef:i64
15756 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15757 // t5: v16i8,ch =
15758 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15759 // t6: i32 =
15760 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15761 // Constant:i32<2>, t3, t5
15762 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15763
15764 // Or transforms the DAG
15765 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15766 // t8: i1 =
15767 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15768 //
15769 // --->
15770 //
15771 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15772 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15773 // t7: i32 =
15774 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15775
15776 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15777 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15778
15779 SDValue IntrID =
15780 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15781 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15782 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15783 IntrID, CRSel, LHSVec, RHSVec);
15784 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15785 // so we need to invert the CC opcode.
15786 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15787 DAG.getConstant(0, DL, MVT::i32),
15788 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15789}
15790
15791// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15792// If it is , return true; otherwise return false.
15794 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15795
15796 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15797 if (CC != ISD::SETEQ)
15798 return false;
15799
15800 SDValue LHS = N->getOperand(0);
15801 SDValue RHS = N->getOperand(1);
15802
15803 // Check the `SDValue &V` is from `and` with `1`.
15804 auto IsAndWithOne = [](SDValue &V) {
15805 if (V.getOpcode() == ISD::AND) {
15806 for (const SDValue &Op : V->ops())
15807 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15808 if (C->isOne())
15809 return true;
15810 }
15811 return false;
15812 };
15813
15814 // Check whether the SETCC compare with zero.
15815 auto IsCompareWithZero = [](SDValue &V) {
15816 if (auto *C = dyn_cast<ConstantSDNode>(V))
15817 if (C->isZero())
15818 return true;
15819 return false;
15820 };
15821
15822 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15823 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15824}
15825
15826// You must check whether the `SDNode* N` can be converted to Xori using
15827// the function `static bool canConvertSETCCToXori(SDNode *N)`
15828// before calling the function; otherwise, it may produce incorrect results.
15830
15831 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15832 SDValue LHS = N->getOperand(0);
15833 SDValue RHS = N->getOperand(1);
15834 SDLoc DL(N);
15835
15836 [[maybe_unused]] ISD::CondCode CC =
15837 cast<CondCodeSDNode>(N->getOperand(2))->get();
15838 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15839 // Rewrite it as XORI (and X, 1), 1.
15840 auto MakeXor1 = [&](SDValue V) {
15841 EVT VT = V.getValueType();
15842 SDValue One = DAG.getConstant(1, DL, VT);
15843 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15844 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15845 };
15846
15847 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15848 return MakeXor1(LHS);
15849
15850 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15851 return MakeXor1(RHS);
15852
15853 llvm_unreachable("Should not reach here.");
15854}
15855
15856SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15857 DAGCombinerInfo &DCI) const {
15858 assert(N->getOpcode() == ISD::SETCC &&
15859 "Should be called with a SETCC node");
15860
15861 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15862 // If it is, rewrite it as XORI (and X, 1), 1.
15864 return ConvertSETCCToXori(N, DCI.DAG);
15865
15866 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15867 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15868 SDValue LHS = N->getOperand(0);
15869 SDValue RHS = N->getOperand(1);
15870
15871 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15872 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15873 LHS.hasOneUse())
15874 std::swap(LHS, RHS);
15875
15876 // x == 0-y --> x+y == 0
15877 // x != 0-y --> x+y != 0
15878 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15879 RHS.hasOneUse()) {
15880 SDLoc DL(N);
15881 SelectionDAG &DAG = DCI.DAG;
15882 EVT VT = N->getValueType(0);
15883 EVT OpVT = LHS.getValueType();
15884 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15885 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15886 }
15887
15888 // Optimization: Fold i128 equality/inequality compares of two loads into a
15889 // vectorized compare using vcmpequb.p when Altivec is available.
15890 //
15891 // Rationale:
15892 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15893 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15894 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15895 // perform a full 128-bit equality check in a single vector compare.
15896 //
15897 // Example Result:
15898 // This transformation replaces memcmp(a, b, 16) with two vector loads
15899 // and one vector compare instruction.
15900
15901 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15902 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15903 }
15904
15905 return DAGCombineTruncBoolExt(N, DCI);
15906}
15907
15908// Is this an extending load from an f32 to an f64?
15909static bool isFPExtLoad(SDValue Op) {
15910 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15911 return LD->getExtensionType() == ISD::EXTLOAD &&
15912 Op.getValueType() == MVT::f64;
15913 return false;
15914}
15915
15916/// Reduces the number of fp-to-int conversion when building a vector.
15917///
15918/// If this vector is built out of floating to integer conversions,
15919/// transform it to a vector built out of floating point values followed by a
15920/// single floating to integer conversion of the vector.
15921/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15922/// becomes (fptosi (build_vector ($A, $B, ...)))
15923SDValue PPCTargetLowering::
15924combineElementTruncationToVectorTruncation(SDNode *N,
15925 DAGCombinerInfo &DCI) const {
15926 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15927 "Should be called with a BUILD_VECTOR node");
15928
15929 SelectionDAG &DAG = DCI.DAG;
15930 SDLoc dl(N);
15931
15932 SDValue FirstInput = N->getOperand(0);
15933 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15934 "The input operand must be an fp-to-int conversion.");
15935
15936 // This combine happens after legalization so the fp_to_[su]i nodes are
15937 // already converted to PPCSISD nodes.
15938 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15939 if (FirstConversion == PPCISD::FCTIDZ ||
15940 FirstConversion == PPCISD::FCTIDUZ ||
15941 FirstConversion == PPCISD::FCTIWZ ||
15942 FirstConversion == PPCISD::FCTIWUZ) {
15943 bool IsSplat = true;
15944 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15945 FirstConversion == PPCISD::FCTIWUZ;
15946 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15948 EVT TargetVT = N->getValueType(0);
15949 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15950 SDValue NextOp = N->getOperand(i);
15951 if (NextOp.getOpcode() != PPCISD::MFVSR)
15952 return SDValue();
15953 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15954 if (NextConversion != FirstConversion)
15955 return SDValue();
15956 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15957 // This is not valid if the input was originally double precision. It is
15958 // also not profitable to do unless this is an extending load in which
15959 // case doing this combine will allow us to combine consecutive loads.
15960 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15961 return SDValue();
15962 if (N->getOperand(i) != FirstInput)
15963 IsSplat = false;
15964 }
15965
15966 // If this is a splat, we leave it as-is since there will be only a single
15967 // fp-to-int conversion followed by a splat of the integer. This is better
15968 // for 32-bit and smaller ints and neutral for 64-bit ints.
15969 if (IsSplat)
15970 return SDValue();
15971
15972 // Now that we know we have the right type of node, get its operands
15973 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15974 SDValue In = N->getOperand(i).getOperand(0);
15975 if (Is32Bit) {
15976 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15977 // here, we know that all inputs are extending loads so this is safe).
15978 if (In.isUndef())
15979 Ops.push_back(DAG.getUNDEF(SrcVT));
15980 else {
15981 SDValue Trunc =
15982 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15983 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15984 Ops.push_back(Trunc);
15985 }
15986 } else
15987 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15988 }
15989
15990 unsigned Opcode;
15991 if (FirstConversion == PPCISD::FCTIDZ ||
15992 FirstConversion == PPCISD::FCTIWZ)
15993 Opcode = ISD::FP_TO_SINT;
15994 else
15995 Opcode = ISD::FP_TO_UINT;
15996
15997 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15998 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15999 return DAG.getNode(Opcode, dl, TargetVT, BV);
16000 }
16001 return SDValue();
16002}
16003
16004// LXVKQ instruction load VSX vector with a special quadword value
16005// based on an immediate value. This helper method returns the details of the
16006// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16007// to help generate the LXVKQ instruction and the subsequent shift instruction
16008// required to match the original build vector pattern.
16009
16010// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16011using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16012
16013static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16014
16015 // LXVKQ instruction loads the Quadword value:
16016 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16017 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16018 static const uint32_t Uim = 16;
16019
16020 // Check for direct LXVKQ match (no shift needed)
16021 if (FullVal == BasePattern)
16022 return std::make_tuple(Uim, uint8_t{0});
16023
16024 // Check if FullValue is 1 (the result of the base pattern >> 127)
16025 if (FullVal == APInt(128, 1))
16026 return std::make_tuple(Uim, uint8_t{127});
16027
16028 return std::nullopt;
16029}
16030
16031/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16032/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16033/// LXVKQ instruction load VSX vector with a special quadword value based on an
16034/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16035/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16036/// This can be used to inline the build vector constants that have the
16037/// following patterns:
16038///
16039/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16040/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16041/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16042/// combination of splatting and right shift instructions.
16043
16044SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16045 SelectionDAG &DAG) const {
16046
16047 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16048 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16049
16050 // This transformation is only supported if we are loading either a byte,
16051 // halfword, word, or doubleword.
16052 EVT VT = Op.getValueType();
16053 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16054 VT == MVT::v2i64))
16055 return SDValue();
16056
16057 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16058 << VT.getEVTString() << "): ";
16059 Op->dump());
16060
16061 unsigned NumElems = VT.getVectorNumElements();
16062 unsigned ElemBits = VT.getScalarSizeInBits();
16063
16064 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16065
16066 // Check for Non-constant operand in the build vector.
16067 for (const SDValue &Operand : Op.getNode()->op_values()) {
16068 if (!isa<ConstantSDNode>(Operand))
16069 return SDValue();
16070 }
16071
16072 // Assemble build vector operands as a 128-bit register value
16073 // We need to reconstruct what the 128-bit register pattern would be
16074 // that produces this vector when interpreted with the current endianness
16075 APInt FullVal = APInt::getZero(128);
16076
16077 for (unsigned Index = 0; Index < NumElems; ++Index) {
16078 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16079
16080 // Get element value as raw bits (zero-extended)
16081 uint64_t ElemValue = C->getZExtValue();
16082
16083 // Mask to element size to ensure we only get the relevant bits
16084 if (ElemBits < 64)
16085 ElemValue &= ((1ULL << ElemBits) - 1);
16086
16087 // Calculate bit position for this element in the 128-bit register
16088 unsigned BitPos =
16089 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16090
16091 // Create APInt for the element value and shift it to correct position
16092 APInt ElemAPInt(128, ElemValue);
16093 ElemAPInt <<= BitPos;
16094
16095 // Place the element value at the correct bit position
16096 FullVal |= ElemAPInt;
16097 }
16098
16099 if (FullVal.isZero() || FullVal.isAllOnes())
16100 return SDValue();
16101
16102 if (auto UIMOpt = getPatternInfo(FullVal)) {
16103 const auto &[Uim, ShiftAmount] = *UIMOpt;
16104 SDLoc Dl(Op);
16105
16106 // Generate LXVKQ instruction if the shift amount is zero.
16107 if (ShiftAmount == 0) {
16108 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16109 SDValue LxvkqInstr =
16110 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16112 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16113 LxvkqInstr.dump());
16114 return LxvkqInstr;
16115 }
16116
16117 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16118
16119 // The right shifted pattern can be constructed using a combination of
16120 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16121 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16122 // value 255.
16123 SDValue ShiftAmountVec =
16124 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16125 DAG.getTargetConstant(255, Dl, MVT::i32)),
16126 0);
16127 // Generate appropriate right shift instruction
16128 SDValue ShiftVec = SDValue(
16129 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16130 0);
16132 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16133 ShiftVec.dump());
16134 return ShiftVec;
16135 }
16136 // No patterns matched for build vectors.
16137 return SDValue();
16138}
16139
16140/// Reduce the number of loads when building a vector.
16141///
16142/// Building a vector out of multiple loads can be converted to a load
16143/// of the vector type if the loads are consecutive. If the loads are
16144/// consecutive but in descending order, a shuffle is added at the end
16145/// to reorder the vector.
16147 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16148 "Should be called with a BUILD_VECTOR node");
16149
16150 SDLoc dl(N);
16151
16152 // Return early for non byte-sized type, as they can't be consecutive.
16153 if (!N->getValueType(0).getVectorElementType().isByteSized())
16154 return SDValue();
16155
16156 bool InputsAreConsecutiveLoads = true;
16157 bool InputsAreReverseConsecutive = true;
16158 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16159 SDValue FirstInput = N->getOperand(0);
16160 bool IsRoundOfExtLoad = false;
16161 LoadSDNode *FirstLoad = nullptr;
16162
16163 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16164 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16165 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16166 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16167 }
16168 // Not a build vector of (possibly fp_rounded) loads.
16169 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16170 N->getNumOperands() == 1)
16171 return SDValue();
16172
16173 if (!IsRoundOfExtLoad)
16174 FirstLoad = cast<LoadSDNode>(FirstInput);
16175
16177 InputLoads.push_back(FirstLoad);
16178 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16179 // If any inputs are fp_round(extload), they all must be.
16180 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16181 return SDValue();
16182
16183 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16184 N->getOperand(i);
16185 if (NextInput.getOpcode() != ISD::LOAD)
16186 return SDValue();
16187
16188 SDValue PreviousInput =
16189 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16190 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16191 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16192
16193 // If any inputs are fp_round(extload), they all must be.
16194 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16195 return SDValue();
16196
16197 // We only care about regular loads. The PPC-specific load intrinsics
16198 // will not lead to a merge opportunity.
16199 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16200 InputsAreConsecutiveLoads = false;
16201 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16202 InputsAreReverseConsecutive = false;
16203
16204 // Exit early if the loads are neither consecutive nor reverse consecutive.
16205 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16206 return SDValue();
16207 InputLoads.push_back(LD2);
16208 }
16209
16210 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16211 "The loads cannot be both consecutive and reverse consecutive.");
16212
16213 SDValue WideLoad;
16214 SDValue ReturnSDVal;
16215 if (InputsAreConsecutiveLoads) {
16216 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16217 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16218 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16219 FirstLoad->getAlign());
16220 ReturnSDVal = WideLoad;
16221 } else if (InputsAreReverseConsecutive) {
16222 LoadSDNode *LastLoad = InputLoads.back();
16223 assert(LastLoad && "Input needs to be a LoadSDNode.");
16224 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16225 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16226 LastLoad->getAlign());
16228 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16229 Ops.push_back(i);
16230
16231 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16232 DAG.getUNDEF(N->getValueType(0)), Ops);
16233 } else
16234 return SDValue();
16235
16236 for (auto *LD : InputLoads)
16237 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16238 return ReturnSDVal;
16239}
16240
16241// This function adds the required vector_shuffle needed to get
16242// the elements of the vector extract in the correct position
16243// as specified by the CorrectElems encoding.
16245 SDValue Input, uint64_t Elems,
16246 uint64_t CorrectElems) {
16247 SDLoc dl(N);
16248
16249 unsigned NumElems = Input.getValueType().getVectorNumElements();
16250 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16251
16252 // Knowing the element indices being extracted from the original
16253 // vector and the order in which they're being inserted, just put
16254 // them at element indices required for the instruction.
16255 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16256 if (DAG.getDataLayout().isLittleEndian())
16257 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16258 else
16259 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16260 CorrectElems = CorrectElems >> 8;
16261 Elems = Elems >> 8;
16262 }
16263
16264 SDValue Shuffle =
16265 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16266 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16267
16268 EVT VT = N->getValueType(0);
16269 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16270
16271 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16272 Input.getValueType().getVectorElementType(),
16274 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16275 DAG.getValueType(ExtVT));
16276}
16277
16278// Look for build vector patterns where input operands come from sign
16279// extended vector_extract elements of specific indices. If the correct indices
16280// aren't used, add a vector shuffle to fix up the indices and create
16281// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16282// during instruction selection.
16284 // This array encodes the indices that the vector sign extend instructions
16285 // extract from when extending from one type to another for both BE and LE.
16286 // The right nibble of each byte corresponds to the LE incides.
16287 // and the left nibble of each byte corresponds to the BE incides.
16288 // For example: 0x3074B8FC byte->word
16289 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16290 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16291 // For example: 0x000070F8 byte->double word
16292 // For LE: the allowed indices are: 0x0,0x8
16293 // For BE: the allowed indices are: 0x7,0xF
16294 uint64_t TargetElems[] = {
16295 0x3074B8FC, // b->w
16296 0x000070F8, // b->d
16297 0x10325476, // h->w
16298 0x00003074, // h->d
16299 0x00001032, // w->d
16300 };
16301
16302 uint64_t Elems = 0;
16303 int Index;
16304 SDValue Input;
16305
16306 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16307 if (!Op)
16308 return false;
16309 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16310 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16311 return false;
16312
16313 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16314 // of the right width.
16315 SDValue Extract = Op.getOperand(0);
16316 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16317 Extract = Extract.getOperand(0);
16318 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16319 return false;
16320
16322 if (!ExtOp)
16323 return false;
16324
16325 Index = ExtOp->getZExtValue();
16326 if (Input && Input != Extract.getOperand(0))
16327 return false;
16328
16329 if (!Input)
16330 Input = Extract.getOperand(0);
16331
16332 Elems = Elems << 8;
16333 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16334 Elems |= Index;
16335
16336 return true;
16337 };
16338
16339 // If the build vector operands aren't sign extended vector extracts,
16340 // of the same input vector, then return.
16341 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16342 if (!isSExtOfVecExtract(N->getOperand(i))) {
16343 return SDValue();
16344 }
16345 }
16346
16347 // If the vector extract indices are not correct, add the appropriate
16348 // vector_shuffle.
16349 int TgtElemArrayIdx;
16350 int InputSize = Input.getValueType().getScalarSizeInBits();
16351 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16352 if (InputSize + OutputSize == 40)
16353 TgtElemArrayIdx = 0;
16354 else if (InputSize + OutputSize == 72)
16355 TgtElemArrayIdx = 1;
16356 else if (InputSize + OutputSize == 48)
16357 TgtElemArrayIdx = 2;
16358 else if (InputSize + OutputSize == 80)
16359 TgtElemArrayIdx = 3;
16360 else if (InputSize + OutputSize == 96)
16361 TgtElemArrayIdx = 4;
16362 else
16363 return SDValue();
16364
16365 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16366 CorrectElems = DAG.getDataLayout().isLittleEndian()
16367 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16368 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16369 if (Elems != CorrectElems) {
16370 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16371 }
16372
16373 // Regular lowering will catch cases where a shuffle is not needed.
16374 return SDValue();
16375}
16376
16377// Look for the pattern of a load from a narrow width to i128, feeding
16378// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16379// (LXVRZX). This node represents a zero extending load that will be matched
16380// to the Load VSX Vector Rightmost instructions.
16382 SDLoc DL(N);
16383
16384 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16385 if (N->getValueType(0) != MVT::v1i128)
16386 return SDValue();
16387
16388 SDValue Operand = N->getOperand(0);
16389 // Proceed with the transformation if the operand to the BUILD_VECTOR
16390 // is a load instruction.
16391 if (Operand.getOpcode() != ISD::LOAD)
16392 return SDValue();
16393
16394 auto *LD = cast<LoadSDNode>(Operand);
16395 EVT MemoryType = LD->getMemoryVT();
16396
16397 // This transformation is only valid if the we are loading either a byte,
16398 // halfword, word, or doubleword.
16399 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16400 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16401
16402 // Ensure that the load from the narrow width is being zero extended to i128.
16403 if (!ValidLDType ||
16404 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16405 LD->getExtensionType() != ISD::EXTLOAD))
16406 return SDValue();
16407
16408 SDValue LoadOps[] = {
16409 LD->getChain(), LD->getBasePtr(),
16410 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16411
16412 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16413 DAG.getVTList(MVT::v1i128, MVT::Other),
16414 LoadOps, MemoryType, LD->getMemOperand());
16415}
16416
16417SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16418 DAGCombinerInfo &DCI) const {
16419 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16420 "Should be called with a BUILD_VECTOR node");
16421
16422 SelectionDAG &DAG = DCI.DAG;
16423 SDLoc dl(N);
16424
16425 if (!Subtarget.hasVSX())
16426 return SDValue();
16427
16428 // The target independent DAG combiner will leave a build_vector of
16429 // float-to-int conversions intact. We can generate MUCH better code for
16430 // a float-to-int conversion of a vector of floats.
16431 SDValue FirstInput = N->getOperand(0);
16432 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16433 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16434 if (Reduced)
16435 return Reduced;
16436 }
16437
16438 // If we're building a vector out of consecutive loads, just load that
16439 // vector type.
16440 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16441 if (Reduced)
16442 return Reduced;
16443
16444 // If we're building a vector out of extended elements from another vector
16445 // we have P9 vector integer extend instructions. The code assumes legal
16446 // input types (i.e. it can't handle things like v4i16) so do not run before
16447 // legalization.
16448 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16449 Reduced = combineBVOfVecSExt(N, DAG);
16450 if (Reduced)
16451 return Reduced;
16452 }
16453
16454 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16455 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16456 // is a load from <valid narrow width> to i128.
16457 if (Subtarget.isISA3_1()) {
16458 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16459 if (BVOfZLoad)
16460 return BVOfZLoad;
16461 }
16462
16463 if (N->getValueType(0) != MVT::v2f64)
16464 return SDValue();
16465
16466 // Looking for:
16467 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16468 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16469 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16470 return SDValue();
16471 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16472 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16473 return SDValue();
16474 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16475 return SDValue();
16476
16477 SDValue Ext1 = FirstInput.getOperand(0);
16478 SDValue Ext2 = N->getOperand(1).getOperand(0);
16479 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16481 return SDValue();
16482
16483 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16484 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16485 if (!Ext1Op || !Ext2Op)
16486 return SDValue();
16487 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16488 Ext1.getOperand(0) != Ext2.getOperand(0))
16489 return SDValue();
16490
16491 int FirstElem = Ext1Op->getZExtValue();
16492 int SecondElem = Ext2Op->getZExtValue();
16493 int SubvecIdx;
16494 if (FirstElem == 0 && SecondElem == 1)
16495 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16496 else if (FirstElem == 2 && SecondElem == 3)
16497 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16498 else
16499 return SDValue();
16500
16501 SDValue SrcVec = Ext1.getOperand(0);
16502 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16503 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16504 return DAG.getNode(NodeType, dl, MVT::v2f64,
16505 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16506}
16507
16508SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16509 DAGCombinerInfo &DCI) const {
16510 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16511 N->getOpcode() == ISD::UINT_TO_FP) &&
16512 "Need an int -> FP conversion node here");
16513
16514 if (useSoftFloat() || !Subtarget.has64BitSupport())
16515 return SDValue();
16516
16517 SelectionDAG &DAG = DCI.DAG;
16518 SDLoc dl(N);
16519 SDValue Op(N, 0);
16520
16521 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16522 // from the hardware.
16523 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16524 return SDValue();
16525 if (!Op.getOperand(0).getValueType().isSimple())
16526 return SDValue();
16527 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16528 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16529 return SDValue();
16530
16531 SDValue FirstOperand(Op.getOperand(0));
16532 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16533 (FirstOperand.getValueType() == MVT::i8 ||
16534 FirstOperand.getValueType() == MVT::i16);
16535 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16536 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16537 bool DstDouble = Op.getValueType() == MVT::f64;
16538 unsigned ConvOp = Signed ?
16539 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16540 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16541 SDValue WidthConst =
16542 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16543 dl, false);
16544 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16545 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16546 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16547 DAG.getVTList(MVT::f64, MVT::Other),
16548 Ops, MVT::i8, LDN->getMemOperand());
16549 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16550
16551 // For signed conversion, we need to sign-extend the value in the VSR
16552 if (Signed) {
16553 SDValue ExtOps[] = { Ld, WidthConst };
16554 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16555 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16556 } else
16557 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16558 }
16559
16560
16561 // For i32 intermediate values, unfortunately, the conversion functions
16562 // leave the upper 32 bits of the value are undefined. Within the set of
16563 // scalar instructions, we have no method for zero- or sign-extending the
16564 // value. Thus, we cannot handle i32 intermediate values here.
16565 if (Op.getOperand(0).getValueType() == MVT::i32)
16566 return SDValue();
16567
16568 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16569 "UINT_TO_FP is supported only with FPCVT");
16570
16571 // If we have FCFIDS, then use it when converting to single-precision.
16572 // Otherwise, convert to double-precision and then round.
16573 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16574 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16575 : PPCISD::FCFIDS)
16576 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16577 : PPCISD::FCFID);
16578 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16579 ? MVT::f32
16580 : MVT::f64;
16581
16582 // If we're converting from a float, to an int, and back to a float again,
16583 // then we don't need the store/load pair at all.
16584 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16585 Subtarget.hasFPCVT()) ||
16586 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16587 SDValue Src = Op.getOperand(0).getOperand(0);
16588 if (Src.getValueType() == MVT::f32) {
16589 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16590 DCI.AddToWorklist(Src.getNode());
16591 } else if (Src.getValueType() != MVT::f64) {
16592 // Make sure that we don't pick up a ppc_fp128 source value.
16593 return SDValue();
16594 }
16595
16596 unsigned FCTOp =
16597 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16598 PPCISD::FCTIDUZ;
16599
16600 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16601 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16602
16603 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16604 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16605 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16606 DCI.AddToWorklist(FP.getNode());
16607 }
16608
16609 return FP;
16610 }
16611
16612 return SDValue();
16613}
16614
16615// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16616// builtins) into loads with swaps.
16618 DAGCombinerInfo &DCI) const {
16619 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16620 // load combines.
16621 if (DCI.isBeforeLegalizeOps())
16622 return SDValue();
16623
16624 SelectionDAG &DAG = DCI.DAG;
16625 SDLoc dl(N);
16626 SDValue Chain;
16627 SDValue Base;
16628 MachineMemOperand *MMO;
16629
16630 switch (N->getOpcode()) {
16631 default:
16632 llvm_unreachable("Unexpected opcode for little endian VSX load");
16633 case ISD::LOAD: {
16635 Chain = LD->getChain();
16636 Base = LD->getBasePtr();
16637 MMO = LD->getMemOperand();
16638 // If the MMO suggests this isn't a load of a full vector, leave
16639 // things alone. For a built-in, we have to make the change for
16640 // correctness, so if there is a size problem that will be a bug.
16641 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16642 return SDValue();
16643 break;
16644 }
16647 Chain = Intrin->getChain();
16648 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16649 // us what we want. Get operand 2 instead.
16650 Base = Intrin->getOperand(2);
16651 MMO = Intrin->getMemOperand();
16652 break;
16653 }
16654 }
16655
16656 MVT VecTy = N->getValueType(0).getSimpleVT();
16657
16658 SDValue LoadOps[] = { Chain, Base };
16659 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16660 DAG.getVTList(MVT::v2f64, MVT::Other),
16661 LoadOps, MVT::v2f64, MMO);
16662
16663 DCI.AddToWorklist(Load.getNode());
16664 Chain = Load.getValue(1);
16665 SDValue Swap = DAG.getNode(
16666 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16667 DCI.AddToWorklist(Swap.getNode());
16668
16669 // Add a bitcast if the resulting load type doesn't match v2f64.
16670 if (VecTy != MVT::v2f64) {
16671 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16672 DCI.AddToWorklist(N.getNode());
16673 // Package {bitcast value, swap's chain} to match Load's shape.
16674 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16675 N, Swap.getValue(1));
16676 }
16677
16678 return Swap;
16679}
16680
16681// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16682// builtins) into stores with swaps.
16684 DAGCombinerInfo &DCI) const {
16685 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16686 // store combines.
16687 if (DCI.isBeforeLegalizeOps())
16688 return SDValue();
16689
16690 SelectionDAG &DAG = DCI.DAG;
16691 SDLoc dl(N);
16692 SDValue Chain;
16693 SDValue Base;
16694 unsigned SrcOpnd;
16695 MachineMemOperand *MMO;
16696
16697 switch (N->getOpcode()) {
16698 default:
16699 llvm_unreachable("Unexpected opcode for little endian VSX store");
16700 case ISD::STORE: {
16702 Chain = ST->getChain();
16703 Base = ST->getBasePtr();
16704 MMO = ST->getMemOperand();
16705 SrcOpnd = 1;
16706 // If the MMO suggests this isn't a store of a full vector, leave
16707 // things alone. For a built-in, we have to make the change for
16708 // correctness, so if there is a size problem that will be a bug.
16709 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16710 return SDValue();
16711 break;
16712 }
16713 case ISD::INTRINSIC_VOID: {
16715 Chain = Intrin->getChain();
16716 // Intrin->getBasePtr() oddly does not get what we want.
16717 Base = Intrin->getOperand(3);
16718 MMO = Intrin->getMemOperand();
16719 SrcOpnd = 2;
16720 break;
16721 }
16722 }
16723
16724 SDValue Src = N->getOperand(SrcOpnd);
16725 MVT VecTy = Src.getValueType().getSimpleVT();
16726
16727 // All stores are done as v2f64 and possible bit cast.
16728 if (VecTy != MVT::v2f64) {
16729 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16730 DCI.AddToWorklist(Src.getNode());
16731 }
16732
16733 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16734 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16735 DCI.AddToWorklist(Swap.getNode());
16736 Chain = Swap.getValue(1);
16737 SDValue StoreOps[] = { Chain, Swap, Base };
16738 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16739 DAG.getVTList(MVT::Other),
16740 StoreOps, VecTy, MMO);
16741 DCI.AddToWorklist(Store.getNode());
16742 return Store;
16743}
16744
16745// Handle DAG combine for STORE (FP_TO_INT F).
16746SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16747 DAGCombinerInfo &DCI) const {
16748 SelectionDAG &DAG = DCI.DAG;
16749 SDLoc dl(N);
16750 unsigned Opcode = N->getOperand(1).getOpcode();
16751 (void)Opcode;
16752 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16753
16754 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16755 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16756 && "Not a FP_TO_INT Instruction!");
16757
16758 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16759 EVT Op1VT = N->getOperand(1).getValueType();
16760 EVT ResVT = Val.getValueType();
16761
16762 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16763 return SDValue();
16764
16765 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16766 bool ValidTypeForStoreFltAsInt =
16767 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16768 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16769
16770 // TODO: Lower conversion from f128 on all VSX targets
16771 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16772 return SDValue();
16773
16774 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16775 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16776 return SDValue();
16777
16778 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16779
16780 // Set number of bytes being converted.
16781 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16782 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16783 DAG.getIntPtrConstant(ByteSize, dl, false),
16784 DAG.getValueType(Op1VT)};
16785
16786 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16787 DAG.getVTList(MVT::Other), Ops,
16788 cast<StoreSDNode>(N)->getMemoryVT(),
16789 cast<StoreSDNode>(N)->getMemOperand());
16790
16791 return Val;
16792}
16793
16794static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16795 // Check that the source of the element keeps flipping
16796 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16797 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16798 for (int i = 1, e = Mask.size(); i < e; i++) {
16799 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16800 return false;
16801 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16802 return false;
16803 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16804 }
16805 return true;
16806}
16807
16808static bool isSplatBV(SDValue Op) {
16809 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16810 return false;
16811 SDValue FirstOp;
16812
16813 // Find first non-undef input.
16814 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16815 FirstOp = Op.getOperand(i);
16816 if (!FirstOp.isUndef())
16817 break;
16818 }
16819
16820 // All inputs are undef or the same as the first non-undef input.
16821 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16822 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16823 return false;
16824 return true;
16825}
16826
16828 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16829 return Op;
16830 if (Op.getOpcode() != ISD::BITCAST)
16831 return SDValue();
16832 Op = Op.getOperand(0);
16833 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16834 return Op;
16835 return SDValue();
16836}
16837
16838// Fix up the shuffle mask to account for the fact that the result of
16839// scalar_to_vector is not in lane zero. This just takes all values in
16840// the ranges specified by the min/max indices and adds the number of
16841// elements required to ensure each element comes from the respective
16842// position in the valid lane.
16843// On little endian, that's just the corresponding element in the other
16844// half of the vector. On big endian, it is in the same half but right
16845// justified rather than left justified in that half.
16847 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16848 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16849 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16850 int LHSEltFixup =
16851 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16852 int RHSEltFixup =
16853 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16854 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16855 int Idx = ShuffV[I];
16856 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16857 ShuffV[I] += LHSEltFixup;
16858 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16859 ShuffV[I] += RHSEltFixup;
16860 }
16861}
16862
16863// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16864// the original is:
16865// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16866// In such a case, just change the shuffle mask to extract the element
16867// from the permuted index.
16869 const PPCSubtarget &Subtarget) {
16870 SDLoc dl(OrigSToV);
16871 EVT VT = OrigSToV.getValueType();
16872 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16873 "Expecting a SCALAR_TO_VECTOR here");
16874 SDValue Input = OrigSToV.getOperand(0);
16875
16876 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16877 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16878 SDValue OrigVector = Input.getOperand(0);
16879
16880 // Can't handle non-const element indices or different vector types
16881 // for the input to the extract and the output of the scalar_to_vector.
16882 if (Idx && VT == OrigVector.getValueType()) {
16883 unsigned NumElts = VT.getVectorNumElements();
16884 assert(
16885 NumElts > 1 &&
16886 "Cannot produce a permuted scalar_to_vector for one element vector");
16887 SmallVector<int, 16> NewMask(NumElts, -1);
16888 unsigned ResultInElt = NumElts / 2;
16889 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16890 NewMask[ResultInElt] = Idx->getZExtValue();
16891 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16892 }
16893 }
16894 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16895 OrigSToV.getOperand(0));
16896}
16897
16899 int HalfVec, int LHSLastElementDefined,
16900 int RHSLastElementDefined) {
16901 for (int Index : ShuffV) {
16902 if (Index < 0) // Skip explicitly undefined mask indices.
16903 continue;
16904 // Handle first input vector of the vector_shuffle.
16905 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16906 (Index > LHSLastElementDefined))
16907 return false;
16908 // Handle second input vector of the vector_shuffle.
16909 if ((RHSLastElementDefined >= 0) &&
16910 (Index > HalfVec + RHSLastElementDefined))
16911 return false;
16912 }
16913 return true;
16914}
16915
16917 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16918 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16919 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16920 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16921 // Set up the values for the shuffle vector fixup.
16922 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16923 // The last element depends on if the input comes from the LHS or RHS.
16924 //
16925 // For example:
16926 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16927 //
16928 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16929 // because elements 1 and higher of a scalar_to_vector are undefined.
16930 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16931 // because elements 1 and higher of a scalar_to_vector are undefined.
16932 // It is also not 4 because the original scalar_to_vector is wider and
16933 // actually contains two i32 elements.
16934 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16935 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16936 : FirstElt;
16937 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16938 if (SToVPermuted.getValueType() != VecShuffOperandType)
16939 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16940 return SToVPermuted;
16941}
16942
16943// On little endian subtargets, combine shuffles such as:
16944// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16945// into:
16946// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16947// because the latter can be matched to a single instruction merge.
16948// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16949// to put the value into element zero. Adjust the shuffle mask so that the
16950// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16951// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16952// nodes with elements smaller than doubleword because all the ways
16953// of getting scalar data into a vector register put the value in the
16954// rightmost element of the left half of the vector.
16955SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16956 SelectionDAG &DAG) const {
16957 SDValue LHS = SVN->getOperand(0);
16958 SDValue RHS = SVN->getOperand(1);
16959 auto Mask = SVN->getMask();
16960 int NumElts = LHS.getValueType().getVectorNumElements();
16961 SDValue Res(SVN, 0);
16962 SDLoc dl(SVN);
16963 bool IsLittleEndian = Subtarget.isLittleEndian();
16964
16965 // On big endian targets this is only useful for subtargets with direct moves.
16966 // On little endian targets it would be useful for all subtargets with VSX.
16967 // However adding special handling for LE subtargets without direct moves
16968 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16969 // which includes direct moves.
16970 if (!Subtarget.hasDirectMove())
16971 return Res;
16972
16973 // If this is not a shuffle of a shuffle and the first element comes from
16974 // the second vector, canonicalize to the commuted form. This will make it
16975 // more likely to match one of the single instruction patterns.
16976 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16977 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16978 std::swap(LHS, RHS);
16979 Res = DAG.getCommutedVectorShuffle(*SVN);
16980
16981 if (!isa<ShuffleVectorSDNode>(Res))
16982 return Res;
16983
16984 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16985 }
16986
16987 // Adjust the shuffle mask if either input vector comes from a
16988 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16989 // form (to prevent the need for a swap).
16990 SmallVector<int, 16> ShuffV(Mask);
16991 SDValue SToVLHS = isScalarToVec(LHS);
16992 SDValue SToVRHS = isScalarToVec(RHS);
16993 if (SToVLHS || SToVRHS) {
16994 EVT VT = SVN->getValueType(0);
16995 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16996 int ShuffleNumElts = ShuffV.size();
16997 int HalfVec = ShuffleNumElts / 2;
16998 // The width of the "valid lane" (i.e. the lane that contains the value that
16999 // is vectorized) needs to be expressed in terms of the number of elements
17000 // of the shuffle. It is thereby the ratio of the values before and after
17001 // any bitcast, which will be set later on if the LHS or RHS are
17002 // SCALAR_TO_VECTOR nodes.
17003 unsigned LHSNumValidElts = HalfVec;
17004 unsigned RHSNumValidElts = HalfVec;
17005
17006 // Initially assume that neither input is permuted. These will be adjusted
17007 // accordingly if either input is. Note, that -1 means that all elements
17008 // are undefined.
17009 int LHSFirstElt = 0;
17010 int RHSFirstElt = ShuffleNumElts;
17011 int LHSLastElt = -1;
17012 int RHSLastElt = -1;
17013
17014 // Get the permuted scalar to vector nodes for the source(s) that come from
17015 // ISD::SCALAR_TO_VECTOR.
17016 // On big endian systems, this only makes sense for element sizes smaller
17017 // than 64 bits since for 64-bit elements, all instructions already put
17018 // the value into element zero. Since scalar size of LHS and RHS may differ
17019 // after isScalarToVec, this should be checked using their own sizes.
17020 int LHSScalarSize = 0;
17021 int RHSScalarSize = 0;
17022 if (SToVLHS) {
17023 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17024 if (!IsLittleEndian && LHSScalarSize >= 64)
17025 return Res;
17026 }
17027 if (SToVRHS) {
17028 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17029 if (!IsLittleEndian && RHSScalarSize >= 64)
17030 return Res;
17031 }
17032 if (LHSScalarSize != 0)
17034 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17035 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17036 if (RHSScalarSize != 0)
17038 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17039 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17040
17041 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17042 return Res;
17043
17044 // Fix up the shuffle mask to reflect where the desired element actually is.
17045 // The minimum and maximum indices that correspond to element zero for both
17046 // the LHS and RHS are computed and will control which shuffle mask entries
17047 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17048 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17050 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17051 LHSNumValidElts, RHSNumValidElts, Subtarget);
17052 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17053
17054 // We may have simplified away the shuffle. We won't be able to do anything
17055 // further with it here.
17056 if (!isa<ShuffleVectorSDNode>(Res))
17057 return Res;
17058 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17059 }
17060
17061 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17062 // The common case after we commuted the shuffle is that the RHS is a splat
17063 // and we have elements coming in from the splat at indices that are not
17064 // conducive to using a merge.
17065 // Example:
17066 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17067 if (!isSplatBV(TheSplat))
17068 return Res;
17069
17070 // We are looking for a mask such that all even elements are from
17071 // one vector and all odd elements from the other.
17072 if (!isAlternatingShuffMask(Mask, NumElts))
17073 return Res;
17074
17075 // Adjust the mask so we are pulling in the same index from the splat
17076 // as the index from the interesting vector in consecutive elements.
17077 if (IsLittleEndian) {
17078 // Example (even elements from first vector):
17079 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17080 if (Mask[0] < NumElts)
17081 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17082 if (ShuffV[i] < 0)
17083 continue;
17084 // If element from non-splat is undef, pick first element from splat.
17085 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17086 }
17087 // Example (odd elements from first vector):
17088 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17089 else
17090 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17091 if (ShuffV[i] < 0)
17092 continue;
17093 // If element from non-splat is undef, pick first element from splat.
17094 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17095 }
17096 } else {
17097 // Example (even elements from first vector):
17098 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17099 if (Mask[0] < NumElts)
17100 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17101 if (ShuffV[i] < 0)
17102 continue;
17103 // If element from non-splat is undef, pick first element from splat.
17104 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17105 }
17106 // Example (odd elements from first vector):
17107 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17108 else
17109 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17110 if (ShuffV[i] < 0)
17111 continue;
17112 // If element from non-splat is undef, pick first element from splat.
17113 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17114 }
17115 }
17116
17117 // If the RHS has undefs, we need to remove them since we may have created
17118 // a shuffle that adds those instead of the splat value.
17119 SDValue SplatVal =
17120 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17121 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17122
17123 if (IsLittleEndian)
17124 RHS = TheSplat;
17125 else
17126 LHS = TheSplat;
17127 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17128}
17129
17130SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17131 LSBaseSDNode *LSBase,
17132 DAGCombinerInfo &DCI) const {
17133 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17134 "Not a reverse memop pattern!");
17135
17136 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17137 auto Mask = SVN->getMask();
17138 int i = 0;
17139 auto I = Mask.rbegin();
17140 auto E = Mask.rend();
17141
17142 for (; I != E; ++I) {
17143 if (*I != i)
17144 return false;
17145 i++;
17146 }
17147 return true;
17148 };
17149
17150 SelectionDAG &DAG = DCI.DAG;
17151 EVT VT = SVN->getValueType(0);
17152
17153 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17154 return SDValue();
17155
17156 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17157 // See comment in PPCVSXSwapRemoval.cpp.
17158 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17159 if (!Subtarget.hasP9Vector())
17160 return SDValue();
17161
17162 if(!IsElementReverse(SVN))
17163 return SDValue();
17164
17165 if (LSBase->getOpcode() == ISD::LOAD) {
17166 // If the load return value 0 has more than one user except the
17167 // shufflevector instruction, it is not profitable to replace the
17168 // shufflevector with a reverse load.
17169 for (SDUse &Use : LSBase->uses())
17170 if (Use.getResNo() == 0 &&
17171 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17172 return SDValue();
17173
17174 SDLoc dl(LSBase);
17175 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17176 return DAG.getMemIntrinsicNode(
17177 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17178 LSBase->getMemoryVT(), LSBase->getMemOperand());
17179 }
17180
17181 if (LSBase->getOpcode() == ISD::STORE) {
17182 // If there are other uses of the shuffle, the swap cannot be avoided.
17183 // Forcing the use of an X-Form (since swapped stores only have
17184 // X-Forms) without removing the swap is unprofitable.
17185 if (!SVN->hasOneUse())
17186 return SDValue();
17187
17188 SDLoc dl(LSBase);
17189 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17190 LSBase->getBasePtr()};
17191 return DAG.getMemIntrinsicNode(
17192 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17193 LSBase->getMemoryVT(), LSBase->getMemOperand());
17194 }
17195
17196 llvm_unreachable("Expected a load or store node here");
17197}
17198
17199static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17200 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17201 if (IntrinsicID == Intrinsic::ppc_stdcx)
17202 StoreWidth = 8;
17203 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17204 StoreWidth = 4;
17205 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17206 StoreWidth = 2;
17207 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17208 StoreWidth = 1;
17209 else
17210 return false;
17211 return true;
17212}
17213
17216 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17217 // (ADDC (ADDE 0, 0, C), -1) -> C
17218 SDValue LHS = N->getOperand(0);
17219 SDValue RHS = N->getOperand(1);
17220 if (LHS->getOpcode() == PPCISD::ADDE &&
17221 isNullConstant(LHS->getOperand(0)) &&
17222 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17223 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17224 }
17225 }
17226 return SDValue();
17227}
17228
17229/// Optimize the bitfloor(X) pattern for PowerPC.
17230/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
17231/// Into: srl MinSignedValue, (ctlz X)
17232///
17233/// This is safe on PowerPC because the srw instruction returns 0 when the
17234/// shift amount is == bitwidth, which matches the behavior we need for X=0.
17236 if (N->getOpcode() != ISD::SELECT_CC)
17237 return SDValue();
17238
17239 // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
17240 SDValue CmpLHS = N->getOperand(0);
17241 SDValue CmpRHS = N->getOperand(1);
17242 SDValue TrueVal = N->getOperand(2);
17243 SDValue FalseVal = N->getOperand(3);
17244 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
17245
17246 // Check if condition is (X == 0)
17247 if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
17248 return SDValue();
17249
17250 // Check if TrueVal is constant 0
17251 if (!isNullConstant(TrueVal))
17252 return SDValue();
17253
17254 // This combine is replacing a select_cc with a PPC srl, not an srl with a
17255 // PPC srl. If the original srl had multiple uses it would just remain in the
17256 // code. This is at most a performance consideration.
17257 if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
17258 return SDValue();
17259
17260 SDValue ShiftVal = FalseVal.getOperand(0);
17261 SDValue ShiftAmt = FalseVal.getOperand(1);
17262
17263 // Check if ShiftVal is MinSignedValue
17264 auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
17265 if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
17266 return SDValue();
17267
17268 SDValue CtlzArg;
17269 // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
17270 if (ShiftAmt.getOpcode() != ISD::CTLZ) {
17271 // Look through truncate if present (for i64 ctlz truncated to i32 shift
17272 // amount)
17273 if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
17274 return SDValue();
17275
17276 // Verify the truncate target type is appropriate for shift amount (i32, not
17277 // i1 or other)
17278 if (ShiftAmt.getValueType() != MVT::i32)
17279 return SDValue();
17280
17281 SDValue CtlzNode = ShiftAmt.getOperand(0);
17282
17283 if (CtlzNode.getOpcode() != ISD::CTLZ)
17284 return SDValue();
17285
17286 CtlzArg = CtlzNode.getOperand(0);
17287 } else {
17288 CtlzArg = ShiftAmt.getOperand(0);
17289 }
17290
17291 // Check if ctlz operates on the same value as the comparison
17292 if (CtlzArg != CmpLHS)
17293 return SDValue();
17294
17295 // Using PPCISD::SRL to ensure well-defined behavior.
17296 // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
17297 // which is exactly what we need for the bitfloor(0) case.
17298 SDLoc DL(N);
17299 SDValue PPCSrl =
17300 DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
17301 return PPCSrl;
17302}
17303
17304// Optimize zero-extension of setcc when the compared value is known to be 0
17305// or 1.
17306//
17307// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17308// -> zext(xor(Value, 1)) for seteq
17309// -> zext(Value) for setne
17310//
17311// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17312// by keeping the value in its original i32 type throughout.
17313//
17314// Example:
17315// Before: zext(setcc(test_data_class(...), 0, seteq))
17316// // test_data_class returns 0 or 1 in i32
17317// // setcc converts i32 -> i1
17318// // zext converts i1 -> i64
17319// After: zext(xor(test_data_class(...), 1))
17320// // Stays in i32, then extends to i64
17321//
17322// This is beneficial because:
17323// 1. Eliminates the setcc instruction
17324// 2. Avoids i32 -> i1 truncation
17325// 3. Keeps computation in native integer width
17326
17328 // Check if this is a zero_extend
17329 if (N->getOpcode() != ISD::ZERO_EXTEND)
17330 return SDValue();
17331
17332 SDValue Src = N->getOperand(0);
17333
17334 // Check if the source is a setcc
17335 if (Src.getOpcode() != ISD::SETCC)
17336 return SDValue();
17337
17338 SDValue LHS = Src.getOperand(0);
17339 SDValue RHS = Src.getOperand(1);
17340 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17341
17343 return SDValue();
17344
17345 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17346
17347 auto isZeroOrOne = [=](SDValue &V) {
17348 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17349 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17350 return true;
17351 return false;
17352 };
17353
17354 if (!isZeroOrOne(NonNullConstant))
17355 return SDValue();
17356
17357 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17358 // zext(setcc (Value), 0, setne))
17359 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17360 // Replace with: zext(xor(Value, 1)) for seteq
17361 // or: zext(Value) for setne
17362 // This keeps the value in i32 instead of converting to i1
17363 SDLoc DL(N);
17364 EVT VType = N->getValueType(0);
17365 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17366
17367 if (CC == ISD::SETNE)
17368 return NewNonNullConstant;
17369
17370 SDValue One = DAG.getConstant(1, DL, VType);
17371 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17372 }
17373
17374 return SDValue();
17375}
17376
17377// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17378// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17379// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17380// 1, cc))
17381// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17382// 0, 1, cc))
17383// 4. etc
17385 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17386
17387 EVT XorVT = N->getValueType(0);
17388 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17389 return SDValue();
17390
17391 SDValue LHS = N->getOperand(0);
17392 SDValue RHS = N->getOperand(1);
17393
17394 // Check for XOR with constant 1
17396 if (!XorConst || !XorConst->isOne()) {
17397 XorConst = dyn_cast<ConstantSDNode>(LHS);
17398 if (!XorConst || !XorConst->isOne())
17399 return SDValue();
17400 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17401 std::swap(LHS, RHS);
17402 }
17403
17404 // Check if LHS has only one use
17405 if (!LHS.hasOneUse())
17406 return SDValue();
17407
17408 // Handle extensions: ZEXT, ANYEXT
17409 SDValue SelectNode = LHS;
17410
17411 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17412 LHS.getOpcode() == ISD::ANY_EXTEND) {
17413 SelectNode = LHS.getOperand(0);
17414
17415 // Check if the extension input has only one use
17416 if (!SelectNode.hasOneUse())
17417 return SDValue();
17418 }
17419
17420 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17421 if (!SelectNode.isMachineOpcode())
17422 return SDValue();
17423
17424 unsigned MachineOpc = SelectNode.getMachineOpcode();
17425
17426 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17427 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17428 return SDValue();
17429
17430 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17431 if (SelectNode.getNumOperands() != 4)
17432 return SDValue();
17433
17434 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17435 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17436
17437 if (!ConstOp1 || !ConstOp2)
17438 return SDValue();
17439
17440 // Only optimize if operands are {0, 1} or {1, 0}
17441 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17442 (ConstOp1->isZero() && ConstOp2->isOne())))
17443 return SDValue();
17444
17445 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17446 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17447 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17448 // create SELECT_CC(cond, 1, 0, pred).
17449 SDLoc DL(N);
17450 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17451
17452 bool ConstOp1IsOne = ConstOp1->isOne();
17453 return SDValue(
17454 DAG.getMachineNode(MachineOpc, DL, XorVT,
17455 {SelectNode.getOperand(0),
17456 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17457 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17458 SelectNode.getOperand(3)}),
17459 0);
17460}
17461
17463 DAGCombinerInfo &DCI) const {
17464 SelectionDAG &DAG = DCI.DAG;
17465 SDLoc dl(N);
17466 switch (N->getOpcode()) {
17467 default: break;
17468 case ISD::ADD:
17469 return combineADD(N, DCI);
17470 case ISD::AND: {
17471 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17472 // original input as that will prevent us from selecting optimal rotates.
17473 // This only matters if the input to the extend is i32 widened to i64.
17474 SDValue Op1 = N->getOperand(0);
17475 SDValue Op2 = N->getOperand(1);
17476 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17477 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17478 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17479 Op1.getOperand(0).getValueType() != MVT::i32)
17480 break;
17481 SDValue NarrowOp = Op1.getOperand(0);
17482 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17483 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17484 break;
17485
17486 uint64_t Imm = Op2->getAsZExtVal();
17487 // Make sure that the constant is narrow enough to fit in the narrow type.
17488 if (!isUInt<32>(Imm))
17489 break;
17490 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17491 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17492 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17493 }
17494 case ISD::XOR: {
17495 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17496 if (SDValue V = combineXorSelectCC(N, DAG))
17497 return V;
17498 break;
17499 }
17500 case ISD::SHL:
17501 return combineSHL(N, DCI);
17502 case ISD::SRA:
17503 return combineSRA(N, DCI);
17504 case ISD::SRL:
17505 return combineSRL(N, DCI);
17506 case ISD::MUL:
17507 return combineMUL(N, DCI);
17508 case ISD::FMA:
17509 case PPCISD::FNMSUB:
17510 return combineFMALike(N, DCI);
17511 case PPCISD::SHL:
17512 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17513 return N->getOperand(0);
17514 break;
17515 case PPCISD::SRL:
17516 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17517 return N->getOperand(0);
17518 break;
17519 case PPCISD::SRA:
17520 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17521 if (C->isZero() || // 0 >>s V -> 0.
17522 C->isAllOnes()) // -1 >>s V -> -1.
17523 return N->getOperand(0);
17524 }
17525 break;
17526 case ISD::ZERO_EXTEND:
17527 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17528 return RetV;
17529 [[fallthrough]];
17530 case ISD::SIGN_EXTEND:
17531 case ISD::ANY_EXTEND:
17532 return DAGCombineExtBoolTrunc(N, DCI);
17533 case ISD::TRUNCATE:
17534 return combineTRUNCATE(N, DCI);
17535 case ISD::SETCC:
17536 if (SDValue CSCC = combineSetCC(N, DCI))
17537 return CSCC;
17538 [[fallthrough]];
17539 case ISD::SELECT_CC:
17540 if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
17541 return V;
17542 return DAGCombineTruncBoolExt(N, DCI);
17543 case ISD::SINT_TO_FP:
17544 case ISD::UINT_TO_FP:
17545 return combineFPToIntToFP(N, DCI);
17547 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17548 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17549 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17550 }
17551 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17552 case ISD::STORE: {
17553
17554 EVT Op1VT = N->getOperand(1).getValueType();
17555 unsigned Opcode = N->getOperand(1).getOpcode();
17556
17557 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17558 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17559 SDValue Val = combineStoreFPToInt(N, DCI);
17560 if (Val)
17561 return Val;
17562 }
17563
17564 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17565 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17566 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17567 if (Val)
17568 return Val;
17569 }
17570
17571 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17572 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17573 N->getOperand(1).getNode()->hasOneUse() &&
17574 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17575 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17576
17577 // STBRX can only handle simple types and it makes no sense to store less
17578 // two bytes in byte-reversed order.
17579 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17580 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17581 break;
17582
17583 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17584 // Do an any-extend to 32-bits if this is a half-word input.
17585 if (BSwapOp.getValueType() == MVT::i16)
17586 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17587
17588 // If the type of BSWAP operand is wider than stored memory width
17589 // it need to be shifted to the right side before STBRX.
17590 if (Op1VT.bitsGT(mVT)) {
17591 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17592 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17593 DAG.getConstant(Shift, dl, MVT::i32));
17594 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17595 if (Op1VT == MVT::i64)
17596 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17597 }
17598
17599 SDValue Ops[] = {
17600 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17601 };
17602 return
17603 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17604 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17605 cast<StoreSDNode>(N)->getMemOperand());
17606 }
17607
17608 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17609 // So it can increase the chance of CSE constant construction.
17610 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17611 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17612 // Need to sign-extended to 64-bits to handle negative values.
17613 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17614 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17615 MemVT.getSizeInBits());
17616 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17617
17618 auto *ST = cast<StoreSDNode>(N);
17619 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17620 ST->getBasePtr(), ST->getOffset(), MemVT,
17621 ST->getMemOperand(), ST->getAddressingMode(),
17622 /*IsTruncating=*/true);
17623 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17624 // new store which will change the constant by removing non-demanded bits.
17625 return ST->isUnindexed()
17626 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17627 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17628 }
17629
17630 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17631 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17632 if (Op1VT.isSimple()) {
17633 MVT StoreVT = Op1VT.getSimpleVT();
17634 if (Subtarget.needsSwapsForVSXMemOps() &&
17635 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17636 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17637 return expandVSXStoreForLE(N, DCI);
17638 }
17639 break;
17640 }
17641 case ISD::LOAD: {
17643 EVT VT = LD->getValueType(0);
17644
17645 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17646 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17647 if (VT.isSimple()) {
17648 MVT LoadVT = VT.getSimpleVT();
17649 if (Subtarget.needsSwapsForVSXMemOps() &&
17650 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17651 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17652 return expandVSXLoadForLE(N, DCI);
17653 }
17654
17655 // We sometimes end up with a 64-bit integer load, from which we extract
17656 // two single-precision floating-point numbers. This happens with
17657 // std::complex<float>, and other similar structures, because of the way we
17658 // canonicalize structure copies. However, if we lack direct moves,
17659 // then the final bitcasts from the extracted integer values to the
17660 // floating-point numbers turn into store/load pairs. Even with direct moves,
17661 // just loading the two floating-point numbers is likely better.
17662 auto ReplaceTwoFloatLoad = [&]() {
17663 if (VT != MVT::i64)
17664 return false;
17665
17666 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17667 LD->isVolatile())
17668 return false;
17669
17670 // We're looking for a sequence like this:
17671 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17672 // t16: i64 = srl t13, Constant:i32<32>
17673 // t17: i32 = truncate t16
17674 // t18: f32 = bitcast t17
17675 // t19: i32 = truncate t13
17676 // t20: f32 = bitcast t19
17677
17678 if (!LD->hasNUsesOfValue(2, 0))
17679 return false;
17680
17681 auto UI = LD->user_begin();
17682 while (UI.getUse().getResNo() != 0) ++UI;
17683 SDNode *Trunc = *UI++;
17684 while (UI.getUse().getResNo() != 0) ++UI;
17685 SDNode *RightShift = *UI;
17686 if (Trunc->getOpcode() != ISD::TRUNCATE)
17687 std::swap(Trunc, RightShift);
17688
17689 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17690 Trunc->getValueType(0) != MVT::i32 ||
17691 !Trunc->hasOneUse())
17692 return false;
17693 if (RightShift->getOpcode() != ISD::SRL ||
17694 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17695 RightShift->getConstantOperandVal(1) != 32 ||
17696 !RightShift->hasOneUse())
17697 return false;
17698
17699 SDNode *Trunc2 = *RightShift->user_begin();
17700 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17701 Trunc2->getValueType(0) != MVT::i32 ||
17702 !Trunc2->hasOneUse())
17703 return false;
17704
17705 SDNode *Bitcast = *Trunc->user_begin();
17706 SDNode *Bitcast2 = *Trunc2->user_begin();
17707
17708 if (Bitcast->getOpcode() != ISD::BITCAST ||
17709 Bitcast->getValueType(0) != MVT::f32)
17710 return false;
17711 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17712 Bitcast2->getValueType(0) != MVT::f32)
17713 return false;
17714
17715 if (Subtarget.isLittleEndian())
17716 std::swap(Bitcast, Bitcast2);
17717
17718 // Bitcast has the second float (in memory-layout order) and Bitcast2
17719 // has the first one.
17720
17721 SDValue BasePtr = LD->getBasePtr();
17722 if (LD->isIndexed()) {
17723 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17724 "Non-pre-inc AM on PPC?");
17725 BasePtr =
17726 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17727 LD->getOffset());
17728 }
17729
17730 auto MMOFlags =
17731 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17732 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17733 LD->getPointerInfo(), LD->getAlign(),
17734 MMOFlags, LD->getAAInfo());
17735 SDValue AddPtr =
17736 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17737 BasePtr, DAG.getIntPtrConstant(4, dl));
17738 SDValue FloatLoad2 = DAG.getLoad(
17739 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17740 LD->getPointerInfo().getWithOffset(4),
17741 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17742
17743 if (LD->isIndexed()) {
17744 // Note that DAGCombine should re-form any pre-increment load(s) from
17745 // what is produced here if that makes sense.
17746 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17747 }
17748
17749 DCI.CombineTo(Bitcast2, FloatLoad);
17750 DCI.CombineTo(Bitcast, FloatLoad2);
17751
17752 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17753 SDValue(FloatLoad2.getNode(), 1));
17754 return true;
17755 };
17756
17757 if (ReplaceTwoFloatLoad())
17758 return SDValue(N, 0);
17759
17760 EVT MemVT = LD->getMemoryVT();
17761 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17762 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17763 if (LD->isUnindexed() && VT.isVector() &&
17764 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17765 // P8 and later hardware should just use LOAD.
17766 !Subtarget.hasP8Vector() &&
17767 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17768 VT == MVT::v4f32))) &&
17769 LD->getAlign() < ABIAlignment) {
17770 // This is a type-legal unaligned Altivec load.
17771 SDValue Chain = LD->getChain();
17772 SDValue Ptr = LD->getBasePtr();
17773 bool isLittleEndian = Subtarget.isLittleEndian();
17774
17775 // This implements the loading of unaligned vectors as described in
17776 // the venerable Apple Velocity Engine overview. Specifically:
17777 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17778 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17779 //
17780 // The general idea is to expand a sequence of one or more unaligned
17781 // loads into an alignment-based permutation-control instruction (lvsl
17782 // or lvsr), a series of regular vector loads (which always truncate
17783 // their input address to an aligned address), and a series of
17784 // permutations. The results of these permutations are the requested
17785 // loaded values. The trick is that the last "extra" load is not taken
17786 // from the address you might suspect (sizeof(vector) bytes after the
17787 // last requested load), but rather sizeof(vector) - 1 bytes after the
17788 // last requested vector. The point of this is to avoid a page fault if
17789 // the base address happened to be aligned. This works because if the
17790 // base address is aligned, then adding less than a full vector length
17791 // will cause the last vector in the sequence to be (re)loaded.
17792 // Otherwise, the next vector will be fetched as you might suspect was
17793 // necessary.
17794
17795 // We might be able to reuse the permutation generation from
17796 // a different base address offset from this one by an aligned amount.
17797 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17798 // optimization later.
17799 Intrinsic::ID Intr, IntrLD, IntrPerm;
17800 MVT PermCntlTy, PermTy, LDTy;
17801 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17802 : Intrinsic::ppc_altivec_lvsl;
17803 IntrLD = Intrinsic::ppc_altivec_lvx;
17804 IntrPerm = Intrinsic::ppc_altivec_vperm;
17805 PermCntlTy = MVT::v16i8;
17806 PermTy = MVT::v4i32;
17807 LDTy = MVT::v4i32;
17808
17809 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17810
17811 // Create the new MMO for the new base load. It is like the original MMO,
17812 // but represents an area in memory almost twice the vector size centered
17813 // on the original address. If the address is unaligned, we might start
17814 // reading up to (sizeof(vector)-1) bytes below the address of the
17815 // original unaligned load.
17817 MachineMemOperand *BaseMMO =
17818 MF.getMachineMemOperand(LD->getMemOperand(),
17819 -(int64_t)MemVT.getStoreSize()+1,
17820 2*MemVT.getStoreSize()-1);
17821
17822 // Create the new base load.
17823 SDValue LDXIntID =
17824 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17825 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17826 SDValue BaseLoad =
17828 DAG.getVTList(PermTy, MVT::Other),
17829 BaseLoadOps, LDTy, BaseMMO);
17830
17831 // Note that the value of IncOffset (which is provided to the next
17832 // load's pointer info offset value, and thus used to calculate the
17833 // alignment), and the value of IncValue (which is actually used to
17834 // increment the pointer value) are different! This is because we
17835 // require the next load to appear to be aligned, even though it
17836 // is actually offset from the base pointer by a lesser amount.
17837 int IncOffset = VT.getSizeInBits() / 8;
17838 int IncValue = IncOffset;
17839
17840 // Walk (both up and down) the chain looking for another load at the real
17841 // (aligned) offset (the alignment of the other load does not matter in
17842 // this case). If found, then do not use the offset reduction trick, as
17843 // that will prevent the loads from being later combined (as they would
17844 // otherwise be duplicates).
17845 if (!findConsecutiveLoad(LD, DAG))
17846 --IncValue;
17847
17849 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17850 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17851
17852 MachineMemOperand *ExtraMMO =
17853 MF.getMachineMemOperand(LD->getMemOperand(),
17854 1, 2*MemVT.getStoreSize()-1);
17855 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17856 SDValue ExtraLoad =
17858 DAG.getVTList(PermTy, MVT::Other),
17859 ExtraLoadOps, LDTy, ExtraMMO);
17860
17861 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17862 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17863
17864 // Because vperm has a big-endian bias, we must reverse the order
17865 // of the input vectors and complement the permute control vector
17866 // when generating little endian code. We have already handled the
17867 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17868 // and ExtraLoad here.
17869 SDValue Perm;
17870 if (isLittleEndian)
17871 Perm = BuildIntrinsicOp(IntrPerm,
17872 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17873 else
17874 Perm = BuildIntrinsicOp(IntrPerm,
17875 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17876
17877 if (VT != PermTy)
17878 Perm = Subtarget.hasAltivec()
17879 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17880 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17881 DAG.getTargetConstant(1, dl, MVT::i64));
17882 // second argument is 1 because this rounding
17883 // is always exact.
17884
17885 // The output of the permutation is our loaded result, the TokenFactor is
17886 // our new chain.
17887 DCI.CombineTo(N, Perm, TF);
17888 return SDValue(N, 0);
17889 }
17890 }
17891 break;
17893 bool isLittleEndian = Subtarget.isLittleEndian();
17894 unsigned IID = N->getConstantOperandVal(0);
17895 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17896 : Intrinsic::ppc_altivec_lvsl);
17897 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17898 SDValue Add = N->getOperand(1);
17899
17900 int Bits = 4 /* 16 byte alignment */;
17901
17902 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17903 APInt::getAllOnes(Bits /* alignment */)
17904 .zext(Add.getScalarValueSizeInBits()))) {
17905 SDNode *BasePtr = Add->getOperand(0).getNode();
17906 for (SDNode *U : BasePtr->users()) {
17907 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17908 U->getConstantOperandVal(0) == IID) {
17909 // We've found another LVSL/LVSR, and this address is an aligned
17910 // multiple of that one. The results will be the same, so use the
17911 // one we've just found instead.
17912
17913 return SDValue(U, 0);
17914 }
17915 }
17916 }
17917
17918 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17919 SDNode *BasePtr = Add->getOperand(0).getNode();
17920 for (SDNode *U : BasePtr->users()) {
17921 if (U->getOpcode() == ISD::ADD &&
17922 isa<ConstantSDNode>(U->getOperand(1)) &&
17923 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17924 (1ULL << Bits) ==
17925 0) {
17926 SDNode *OtherAdd = U;
17927 for (SDNode *V : OtherAdd->users()) {
17928 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17929 V->getConstantOperandVal(0) == IID) {
17930 return SDValue(V, 0);
17931 }
17932 }
17933 }
17934 }
17935 }
17936 }
17937
17938 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17939 // Expose the vabsduw/h/b opportunity for down stream
17940 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17941 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17942 IID == Intrinsic::ppc_altivec_vmaxsh ||
17943 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17944 SDValue V1 = N->getOperand(1);
17945 SDValue V2 = N->getOperand(2);
17946 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17947 V1.getSimpleValueType() == MVT::v8i16 ||
17948 V1.getSimpleValueType() == MVT::v16i8) &&
17950 // (0-a, a)
17951 if (V1.getOpcode() == ISD::SUB &&
17953 V1.getOperand(1) == V2) {
17954 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17955 }
17956 // (a, 0-a)
17957 if (V2.getOpcode() == ISD::SUB &&
17959 V2.getOperand(1) == V1) {
17960 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17961 }
17962 // (x-y, y-x)
17963 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17964 V1.getOperand(0) == V2.getOperand(1) &&
17965 V1.getOperand(1) == V2.getOperand(0)) {
17966 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17967 }
17968 }
17969 }
17970 }
17971
17972 break;
17974 switch (N->getConstantOperandVal(1)) {
17975 default:
17976 break;
17977 case Intrinsic::ppc_altivec_vsum4sbs:
17978 case Intrinsic::ppc_altivec_vsum4shs:
17979 case Intrinsic::ppc_altivec_vsum4ubs: {
17980 // These sum-across intrinsics only have a chain due to the side effect
17981 // that they may set the SAT bit. If we know the SAT bit will not be set
17982 // for some inputs, we can replace any uses of their chain with the
17983 // input chain.
17984 if (BuildVectorSDNode *BVN =
17985 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17986 APInt APSplatBits, APSplatUndef;
17987 unsigned SplatBitSize;
17988 bool HasAnyUndefs;
17989 bool BVNIsConstantSplat = BVN->isConstantSplat(
17990 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17991 !Subtarget.isLittleEndian());
17992 // If the constant splat vector is 0, the SAT bit will not be set.
17993 if (BVNIsConstantSplat && APSplatBits == 0)
17994 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17995 }
17996 return SDValue();
17997 }
17998 case Intrinsic::ppc_vsx_lxvw4x:
17999 case Intrinsic::ppc_vsx_lxvd2x:
18000 // For little endian, VSX loads require generating lxvd2x/xxswapd.
18001 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
18002 if (Subtarget.needsSwapsForVSXMemOps())
18003 return expandVSXLoadForLE(N, DCI);
18004 break;
18005 }
18006 break;
18008 // For little endian, VSX stores require generating xxswapd/stxvd2x.
18009 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
18010 if (Subtarget.needsSwapsForVSXMemOps()) {
18011 switch (N->getConstantOperandVal(1)) {
18012 default:
18013 break;
18014 case Intrinsic::ppc_vsx_stxvw4x:
18015 case Intrinsic::ppc_vsx_stxvd2x:
18016 return expandVSXStoreForLE(N, DCI);
18017 }
18018 }
18019 break;
18020 case ISD::BSWAP: {
18021 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
18022 // For subtargets without LDBRX, we can still do better than the default
18023 // expansion even for 64-bit BSWAP (LOAD).
18024 bool Is64BitBswapOn64BitTgt =
18025 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
18026 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
18027 N->getOperand(0).hasOneUse();
18028 if (IsSingleUseNormalLd &&
18029 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
18030 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
18031 SDValue Load = N->getOperand(0);
18032 LoadSDNode *LD = cast<LoadSDNode>(Load);
18033 // Create the byte-swapping load.
18034 SDValue Ops[] = {
18035 LD->getChain(), // Chain
18036 LD->getBasePtr(), // Ptr
18037 DAG.getValueType(N->getValueType(0)) // VT
18038 };
18039 SDValue BSLoad =
18040 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18041 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18042 MVT::i64 : MVT::i32, MVT::Other),
18043 Ops, LD->getMemoryVT(), LD->getMemOperand());
18044
18045 // If this is an i16 load, insert the truncate.
18046 SDValue ResVal = BSLoad;
18047 if (N->getValueType(0) == MVT::i16)
18048 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18049
18050 // First, combine the bswap away. This makes the value produced by the
18051 // load dead.
18052 DCI.CombineTo(N, ResVal);
18053
18054 // Next, combine the load away, we give it a bogus result value but a real
18055 // chain result. The result value is dead because the bswap is dead.
18056 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18057
18058 // Return N so it doesn't get rechecked!
18059 return SDValue(N, 0);
18060 }
18061 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18062 // before legalization so that the BUILD_PAIR is handled correctly.
18063 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18064 !IsSingleUseNormalLd)
18065 return SDValue();
18066 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18067
18068 // Can't split volatile or atomic loads.
18069 if (!LD->isSimple())
18070 return SDValue();
18071 SDValue BasePtr = LD->getBasePtr();
18072 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18073 LD->getPointerInfo(), LD->getAlign());
18074 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18075 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18076 DAG.getIntPtrConstant(4, dl));
18078 LD->getMemOperand(), 4, 4);
18079 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18080 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18081 SDValue Res;
18082 if (Subtarget.isLittleEndian())
18083 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18084 else
18085 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18086 SDValue TF =
18087 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18088 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18089 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18090 return Res;
18091 }
18092 case PPCISD::VCMP:
18093 // If a VCMP_rec node already exists with exactly the same operands as this
18094 // node, use its result instead of this node (VCMP_rec computes both a CR6
18095 // and a normal output).
18096 //
18097 if (!N->getOperand(0).hasOneUse() &&
18098 !N->getOperand(1).hasOneUse() &&
18099 !N->getOperand(2).hasOneUse()) {
18100
18101 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18102 SDNode *VCMPrecNode = nullptr;
18103
18104 SDNode *LHSN = N->getOperand(0).getNode();
18105 for (SDNode *User : LHSN->users())
18106 if (User->getOpcode() == PPCISD::VCMP_rec &&
18107 User->getOperand(1) == N->getOperand(1) &&
18108 User->getOperand(2) == N->getOperand(2) &&
18109 User->getOperand(0) == N->getOperand(0)) {
18110 VCMPrecNode = User;
18111 break;
18112 }
18113
18114 // If there is no VCMP_rec node, or if the flag value has a single use,
18115 // don't transform this.
18116 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18117 break;
18118
18119 // Look at the (necessarily single) use of the flag value. If it has a
18120 // chain, this transformation is more complex. Note that multiple things
18121 // could use the value result, which we should ignore.
18122 SDNode *FlagUser = nullptr;
18123 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18124 FlagUser == nullptr; ++UI) {
18125 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18126 SDNode *User = UI->getUser();
18127 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18128 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18129 FlagUser = User;
18130 break;
18131 }
18132 }
18133 }
18134
18135 // If the user is a MFOCRF instruction, we know this is safe.
18136 // Otherwise we give up for right now.
18137 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18138 return SDValue(VCMPrecNode, 0);
18139 }
18140 break;
18141 case ISD::BR_CC: {
18142 // If this is a branch on an altivec predicate comparison, lower this so
18143 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18144 // lowering is done pre-legalize, because the legalizer lowers the predicate
18145 // compare down to code that is difficult to reassemble.
18146 // This code also handles branches that depend on the result of a store
18147 // conditional.
18148 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18149 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18150
18151 int CompareOpc;
18152 bool isDot;
18153
18154 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18155 break;
18156
18157 // Since we are doing this pre-legalize, the RHS can be a constant of
18158 // arbitrary bitwidth which may cause issues when trying to get the value
18159 // from the underlying APInt.
18160 auto RHSAPInt = RHS->getAsAPIntVal();
18161 if (!RHSAPInt.isIntN(64))
18162 break;
18163
18164 unsigned Val = RHSAPInt.getZExtValue();
18165 auto isImpossibleCompare = [&]() {
18166 // If this is a comparison against something other than 0/1, then we know
18167 // that the condition is never/always true.
18168 if (Val != 0 && Val != 1) {
18169 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18170 return N->getOperand(0);
18171 // Always !=, turn it into an unconditional branch.
18172 return DAG.getNode(ISD::BR, dl, MVT::Other,
18173 N->getOperand(0), N->getOperand(4));
18174 }
18175 return SDValue();
18176 };
18177 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18178 unsigned StoreWidth = 0;
18179 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18180 isStoreConditional(LHS, StoreWidth)) {
18181 if (SDValue Impossible = isImpossibleCompare())
18182 return Impossible;
18183 PPC::Predicate CompOpc;
18184 // eq 0 => ne
18185 // ne 0 => eq
18186 // eq 1 => eq
18187 // ne 1 => ne
18188 if (Val == 0)
18189 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18190 else
18191 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18192
18193 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18194 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18195 auto *MemNode = cast<MemSDNode>(LHS);
18196 SDValue ConstSt = DAG.getMemIntrinsicNode(
18197 PPCISD::STORE_COND, dl,
18198 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18199 MemNode->getMemoryVT(), MemNode->getMemOperand());
18200
18201 SDValue InChain;
18202 // Unchain the branch from the original store conditional.
18203 if (N->getOperand(0) == LHS.getValue(1))
18204 InChain = LHS.getOperand(0);
18205 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18206 SmallVector<SDValue, 4> InChains;
18207 SDValue InTF = N->getOperand(0);
18208 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18209 if (InTF.getOperand(i) != LHS.getValue(1))
18210 InChains.push_back(InTF.getOperand(i));
18211 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18212 }
18213
18214 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18215 DAG.getConstant(CompOpc, dl, MVT::i32),
18216 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18217 ConstSt.getValue(2));
18218 }
18219
18220 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18221 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18222 assert(isDot && "Can't compare against a vector result!");
18223
18224 if (SDValue Impossible = isImpossibleCompare())
18225 return Impossible;
18226
18227 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18228 // Create the PPCISD altivec 'dot' comparison node.
18229 SDValue Ops[] = {
18230 LHS.getOperand(2), // LHS of compare
18231 LHS.getOperand(3), // RHS of compare
18232 DAG.getConstant(CompareOpc, dl, MVT::i32)
18233 };
18234 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18235 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18236
18237 // Unpack the result based on how the target uses it.
18238 PPC::Predicate CompOpc;
18239 switch (LHS.getConstantOperandVal(1)) {
18240 default: // Can't happen, don't crash on invalid number though.
18241 case 0: // Branch on the value of the EQ bit of CR6.
18242 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18243 break;
18244 case 1: // Branch on the inverted value of the EQ bit of CR6.
18245 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18246 break;
18247 case 2: // Branch on the value of the LT bit of CR6.
18248 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18249 break;
18250 case 3: // Branch on the inverted value of the LT bit of CR6.
18251 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18252 break;
18253 }
18254
18255 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18256 DAG.getConstant(CompOpc, dl, MVT::i32),
18257 DAG.getRegister(PPC::CR6, MVT::i32),
18258 N->getOperand(4), CompNode.getValue(1));
18259 }
18260 break;
18261 }
18262 case ISD::BUILD_VECTOR:
18263 return DAGCombineBuildVector(N, DCI);
18264 case PPCISD::ADDC:
18265 return DAGCombineAddc(N, DCI);
18266
18267 case ISD::BITCAST:
18268 return DAGCombineBitcast(N, DCI);
18269 }
18270
18271 return SDValue();
18272}
18273
18274SDValue
18276 SelectionDAG &DAG,
18277 SmallVectorImpl<SDNode *> &Created) const {
18278 // fold (sdiv X, pow2)
18279 EVT VT = N->getValueType(0);
18280 if (VT == MVT::i64 && !Subtarget.isPPC64())
18281 return SDValue();
18282 if ((VT != MVT::i32 && VT != MVT::i64) ||
18283 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18284 return SDValue();
18285
18286 SDLoc DL(N);
18287 SDValue N0 = N->getOperand(0);
18288
18289 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18290 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18291 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18292
18293 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18294 Created.push_back(Op.getNode());
18295
18296 if (IsNegPow2) {
18297 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18298 Created.push_back(Op.getNode());
18299 }
18300
18301 return Op;
18302}
18303
18304//===----------------------------------------------------------------------===//
18305// Inline Assembly Support
18306//===----------------------------------------------------------------------===//
18307
18309 KnownBits &Known,
18310 const APInt &DemandedElts,
18311 const SelectionDAG &DAG,
18312 unsigned Depth) const {
18313 Known.resetAll();
18314 switch (Op.getOpcode()) {
18315 default: break;
18316 case PPCISD::LBRX: {
18317 // lhbrx is known to have the top bits cleared out.
18318 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18319 Known.Zero = 0xFFFF0000;
18320 break;
18321 }
18322 case PPCISD::ADDE: {
18323 if (Op.getResNo() == 0) {
18324 // (0|1), _ = ADDE 0, 0, CARRY
18325 SDValue LHS = Op.getOperand(0);
18326 SDValue RHS = Op.getOperand(1);
18327 if (isNullConstant(LHS) && isNullConstant(RHS))
18328 Known.Zero = ~1ULL;
18329 }
18330 break;
18331 }
18333 switch (Op.getConstantOperandVal(0)) {
18334 default: break;
18335 case Intrinsic::ppc_altivec_vcmpbfp_p:
18336 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18337 case Intrinsic::ppc_altivec_vcmpequb_p:
18338 case Intrinsic::ppc_altivec_vcmpequh_p:
18339 case Intrinsic::ppc_altivec_vcmpequw_p:
18340 case Intrinsic::ppc_altivec_vcmpequd_p:
18341 case Intrinsic::ppc_altivec_vcmpequq_p:
18342 case Intrinsic::ppc_altivec_vcmpgefp_p:
18343 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18344 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18345 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18346 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18347 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18348 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18349 case Intrinsic::ppc_altivec_vcmpgtub_p:
18350 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18351 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18352 case Intrinsic::ppc_altivec_vcmpgtud_p:
18353 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18354 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18355 break;
18356 }
18357 break;
18358 }
18360 switch (Op.getConstantOperandVal(1)) {
18361 default:
18362 break;
18363 case Intrinsic::ppc_load2r:
18364 // Top bits are cleared for load2r (which is the same as lhbrx).
18365 Known.Zero = 0xFFFF0000;
18366 break;
18367 }
18368 break;
18369 }
18370 }
18371}
18372
18374 switch (Subtarget.getCPUDirective()) {
18375 default: break;
18376 case PPC::DIR_970:
18377 case PPC::DIR_PWR4:
18378 case PPC::DIR_PWR5:
18379 case PPC::DIR_PWR5X:
18380 case PPC::DIR_PWR6:
18381 case PPC::DIR_PWR6X:
18382 case PPC::DIR_PWR7:
18383 case PPC::DIR_PWR8:
18384 case PPC::DIR_PWR9:
18385 case PPC::DIR_PWR10:
18386 case PPC::DIR_PWR11:
18387 case PPC::DIR_PWR_FUTURE: {
18388 if (!ML)
18389 break;
18390
18392 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18393 // so that we can decrease cache misses and branch-prediction misses.
18394 // Actual alignment of the loop will depend on the hotness check and other
18395 // logic in alignBlocks.
18396 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18397 return Align(32);
18398 }
18399
18400 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18401
18402 // For small loops (between 5 and 8 instructions), align to a 32-byte
18403 // boundary so that the entire loop fits in one instruction-cache line.
18404 uint64_t LoopSize = 0;
18405 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18406 for (const MachineInstr &J : **I) {
18407 LoopSize += TII->getInstSizeInBytes(J);
18408 if (LoopSize > 32)
18409 break;
18410 }
18411
18412 if (LoopSize > 16 && LoopSize <= 32)
18413 return Align(32);
18414
18415 break;
18416 }
18417 }
18418
18420}
18421
18422/// getConstraintType - Given a constraint, return the type of
18423/// constraint it is for this target.
18426 if (Constraint.size() == 1) {
18427 switch (Constraint[0]) {
18428 default: break;
18429 case 'b':
18430 case 'r':
18431 case 'f':
18432 case 'd':
18433 case 'v':
18434 case 'y':
18435 return C_RegisterClass;
18436 case 'Z':
18437 // FIXME: While Z does indicate a memory constraint, it specifically
18438 // indicates an r+r address (used in conjunction with the 'y' modifier
18439 // in the replacement string). Currently, we're forcing the base
18440 // register to be r0 in the asm printer (which is interpreted as zero)
18441 // and forming the complete address in the second register. This is
18442 // suboptimal.
18443 return C_Memory;
18444 }
18445 } else if (Constraint == "wc") { // individual CR bits.
18446 return C_RegisterClass;
18447 } else if (Constraint == "wa" || Constraint == "wd" ||
18448 Constraint == "wf" || Constraint == "ws" ||
18449 Constraint == "wi" || Constraint == "ww") {
18450 return C_RegisterClass; // VSX registers.
18451 }
18452 return TargetLowering::getConstraintType(Constraint);
18453}
18454
18455/// Examine constraint type and operand type and determine a weight value.
18456/// This object must already have been set up with the operand type
18457/// and the current alternative constraint selected.
18460 AsmOperandInfo &info, const char *constraint) const {
18462 Value *CallOperandVal = info.CallOperandVal;
18463 // If we don't have a value, we can't do a match,
18464 // but allow it at the lowest weight.
18465 if (!CallOperandVal)
18466 return CW_Default;
18467 Type *type = CallOperandVal->getType();
18468
18469 // Look at the constraint type.
18470 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18471 return CW_Register; // an individual CR bit.
18472 else if ((StringRef(constraint) == "wa" ||
18473 StringRef(constraint) == "wd" ||
18474 StringRef(constraint) == "wf") &&
18475 type->isVectorTy())
18476 return CW_Register;
18477 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18478 return CW_Register; // just hold 64-bit integers data.
18479 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18480 return CW_Register;
18481 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18482 return CW_Register;
18483
18484 switch (*constraint) {
18485 default:
18487 break;
18488 case 'b':
18489 if (type->isIntegerTy())
18490 weight = CW_Register;
18491 break;
18492 case 'f':
18493 if (type->isFloatTy())
18494 weight = CW_Register;
18495 break;
18496 case 'd':
18497 if (type->isDoubleTy())
18498 weight = CW_Register;
18499 break;
18500 case 'v':
18501 if (type->isVectorTy())
18502 weight = CW_Register;
18503 break;
18504 case 'y':
18505 weight = CW_Register;
18506 break;
18507 case 'Z':
18508 weight = CW_Memory;
18509 break;
18510 }
18511 return weight;
18512}
18513
18514std::pair<unsigned, const TargetRegisterClass *>
18516 StringRef Constraint,
18517 MVT VT) const {
18518 if (Constraint.size() == 1) {
18519 // GCC RS6000 Constraint Letters
18520 switch (Constraint[0]) {
18521 case 'b': // R1-R31
18522 if (VT == MVT::i64 && Subtarget.isPPC64())
18523 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18524 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18525 case 'r': // R0-R31
18526 if (VT == MVT::i64 && Subtarget.isPPC64())
18527 return std::make_pair(0U, &PPC::G8RCRegClass);
18528 return std::make_pair(0U, &PPC::GPRCRegClass);
18529 // 'd' and 'f' constraints are both defined to be "the floating point
18530 // registers", where one is for 32-bit and the other for 64-bit. We don't
18531 // really care overly much here so just give them all the same reg classes.
18532 case 'd':
18533 case 'f':
18534 if (Subtarget.hasSPE()) {
18535 if (VT == MVT::f32 || VT == MVT::i32)
18536 return std::make_pair(0U, &PPC::GPRCRegClass);
18537 if (VT == MVT::f64 || VT == MVT::i64)
18538 return std::make_pair(0U, &PPC::SPERCRegClass);
18539 } else {
18540 if (VT == MVT::f32 || VT == MVT::i32)
18541 return std::make_pair(0U, &PPC::F4RCRegClass);
18542 if (VT == MVT::f64 || VT == MVT::i64)
18543 return std::make_pair(0U, &PPC::F8RCRegClass);
18544 }
18545 break;
18546 case 'v':
18547 if (Subtarget.hasAltivec() && VT.isVector())
18548 return std::make_pair(0U, &PPC::VRRCRegClass);
18549 else if (Subtarget.hasVSX())
18550 // Scalars in Altivec registers only make sense with VSX.
18551 return std::make_pair(0U, &PPC::VFRCRegClass);
18552 break;
18553 case 'y': // crrc
18554 return std::make_pair(0U, &PPC::CRRCRegClass);
18555 }
18556 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18557 // An individual CR bit.
18558 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18559 } else if ((Constraint == "wa" || Constraint == "wd" ||
18560 Constraint == "wf" || Constraint == "wi") &&
18561 Subtarget.hasVSX()) {
18562 // A VSX register for either a scalar (FP) or vector. There is no
18563 // support for single precision scalars on subtargets prior to Power8.
18564 if (VT.isVector())
18565 return std::make_pair(0U, &PPC::VSRCRegClass);
18566 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18567 return std::make_pair(0U, &PPC::VSSRCRegClass);
18568 return std::make_pair(0U, &PPC::VSFRCRegClass);
18569 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18570 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18571 return std::make_pair(0U, &PPC::VSSRCRegClass);
18572 else
18573 return std::make_pair(0U, &PPC::VSFRCRegClass);
18574 } else if (Constraint == "lr") {
18575 if (VT == MVT::i64)
18576 return std::make_pair(0U, &PPC::LR8RCRegClass);
18577 else
18578 return std::make_pair(0U, &PPC::LRRCRegClass);
18579 }
18580
18581 // Handle special cases of physical registers that are not properly handled
18582 // by the base class.
18583 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18584 // If we name a VSX register, we can't defer to the base class because it
18585 // will not recognize the correct register (their names will be VSL{0-31}
18586 // and V{0-31} so they won't match). So we match them here.
18587 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18588 int VSNum = atoi(Constraint.data() + 3);
18589 assert(VSNum >= 0 && VSNum <= 63 &&
18590 "Attempted to access a vsr out of range");
18591 if (VSNum < 32)
18592 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18593 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18594 }
18595
18596 // For float registers, we can't defer to the base class as it will match
18597 // the SPILLTOVSRRC class.
18598 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18599 int RegNum = atoi(Constraint.data() + 2);
18600 if (RegNum > 31 || RegNum < 0)
18601 report_fatal_error("Invalid floating point register number");
18602 if (VT == MVT::f32 || VT == MVT::i32)
18603 return Subtarget.hasSPE()
18604 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18605 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18606 if (VT == MVT::f64 || VT == MVT::i64)
18607 return Subtarget.hasSPE()
18608 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18609 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18610 }
18611 }
18612
18613 std::pair<unsigned, const TargetRegisterClass *> R =
18615
18616 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18617 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18618 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18619 // register.
18620 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18621 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18622 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18623 PPC::GPRCRegClass.contains(R.first))
18624 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18625 PPC::sub_32, &PPC::G8RCRegClass),
18626 &PPC::G8RCRegClass);
18627
18628 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18629 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18630 R.first = PPC::CR0;
18631 R.second = &PPC::CRRCRegClass;
18632 }
18633 // FIXME: This warning should ideally be emitted in the front end.
18634 const auto &TM = getTargetMachine();
18635 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18636 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18637 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18638 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18639 errs() << "warning: vector registers 20 to 32 are reserved in the "
18640 "default AIX AltiVec ABI and cannot be used\n";
18641 }
18642
18643 return R;
18644}
18645
18646/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18647/// vector. If it is invalid, don't add anything to Ops.
18649 StringRef Constraint,
18650 std::vector<SDValue> &Ops,
18651 SelectionDAG &DAG) const {
18652 SDValue Result;
18653
18654 // Only support length 1 constraints.
18655 if (Constraint.size() > 1)
18656 return;
18657
18658 char Letter = Constraint[0];
18659 switch (Letter) {
18660 default: break;
18661 case 'I':
18662 case 'J':
18663 case 'K':
18664 case 'L':
18665 case 'M':
18666 case 'N':
18667 case 'O':
18668 case 'P': {
18670 if (!CST) return; // Must be an immediate to match.
18671 SDLoc dl(Op);
18672 int64_t Value = CST->getSExtValue();
18673 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18674 // numbers are printed as such.
18675 switch (Letter) {
18676 default: llvm_unreachable("Unknown constraint letter!");
18677 case 'I': // "I" is a signed 16-bit constant.
18678 if (isInt<16>(Value))
18679 Result = DAG.getTargetConstant(Value, dl, TCVT);
18680 break;
18681 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18683 Result = DAG.getTargetConstant(Value, dl, TCVT);
18684 break;
18685 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18687 Result = DAG.getTargetConstant(Value, dl, TCVT);
18688 break;
18689 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18690 if (isUInt<16>(Value))
18691 Result = DAG.getTargetConstant(Value, dl, TCVT);
18692 break;
18693 case 'M': // "M" is a constant that is greater than 31.
18694 if (Value > 31)
18695 Result = DAG.getTargetConstant(Value, dl, TCVT);
18696 break;
18697 case 'N': // "N" is a positive constant that is an exact power of two.
18698 if (Value > 0 && isPowerOf2_64(Value))
18699 Result = DAG.getTargetConstant(Value, dl, TCVT);
18700 break;
18701 case 'O': // "O" is the constant zero.
18702 if (Value == 0)
18703 Result = DAG.getTargetConstant(Value, dl, TCVT);
18704 break;
18705 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18706 if (isInt<16>(-Value))
18707 Result = DAG.getTargetConstant(Value, dl, TCVT);
18708 break;
18709 }
18710 break;
18711 }
18712 }
18713
18714 if (Result.getNode()) {
18715 Ops.push_back(Result);
18716 return;
18717 }
18718
18719 // Handle standard constraint letters.
18721}
18722
18725 SelectionDAG &DAG) const {
18726 if (I.getNumOperands() <= 1)
18727 return;
18728 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18729 return;
18730 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18731 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18732 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18733 return;
18734
18735 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18736 Ops.push_back(DAG.getMDNode(MDN));
18737}
18738
18739// isLegalAddressingMode - Return true if the addressing mode represented
18740// by AM is legal for this target, for a load/store of the specified type.
18742 const AddrMode &AM, Type *Ty,
18743 unsigned AS,
18744 Instruction *I) const {
18745 // Vector type r+i form is supported since power9 as DQ form. We don't check
18746 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18747 // imm form is preferred and the offset can be adjusted to use imm form later
18748 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18749 // max offset to check legal addressing mode, we should be a little aggressive
18750 // to contain other offsets for that LSRUse.
18751 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18752 return false;
18753
18754 // PPC allows a sign-extended 16-bit immediate field.
18755 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18756 return false;
18757
18758 // No global is ever allowed as a base.
18759 if (AM.BaseGV)
18760 return false;
18761
18762 // PPC only support r+r,
18763 switch (AM.Scale) {
18764 case 0: // "r+i" or just "i", depending on HasBaseReg.
18765 break;
18766 case 1:
18767 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18768 return false;
18769 // Otherwise we have r+r or r+i.
18770 break;
18771 case 2:
18772 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18773 return false;
18774 // Allow 2*r as r+r.
18775 break;
18776 default:
18777 // No other scales are supported.
18778 return false;
18779 }
18780
18781 return true;
18782}
18783
18784SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18785 SelectionDAG &DAG) const {
18787 MachineFrameInfo &MFI = MF.getFrameInfo();
18788 MFI.setReturnAddressIsTaken(true);
18789
18790 SDLoc dl(Op);
18791 unsigned Depth = Op.getConstantOperandVal(0);
18792
18793 // Make sure the function does not optimize away the store of the RA to
18794 // the stack.
18795 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18796 FuncInfo->setLRStoreRequired();
18797 auto PtrVT = getPointerTy(MF.getDataLayout());
18798
18799 if (Depth > 0) {
18800 // The link register (return address) is saved in the caller's frame
18801 // not the callee's stack frame. So we must get the caller's frame
18802 // address and load the return address at the LR offset from there.
18803 SDValue FrameAddr =
18804 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18806 SDValue Offset =
18807 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18808 Subtarget.getScalarIntVT());
18809 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18810 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18812 }
18813
18814 // Just load the return address off the stack.
18815 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18816 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18818}
18819
18820SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18821 SelectionDAG &DAG) const {
18822 SDLoc dl(Op);
18823 unsigned Depth = Op.getConstantOperandVal(0);
18824
18825 MachineFunction &MF = DAG.getMachineFunction();
18826 MachineFrameInfo &MFI = MF.getFrameInfo();
18827 MFI.setFrameAddressIsTaken(true);
18828
18829 EVT PtrVT = getPointerTy(MF.getDataLayout());
18830 bool isPPC64 = PtrVT == MVT::i64;
18831
18832 // Naked functions never have a frame pointer, and so we use r1. For all
18833 // other functions, this decision must be delayed until during PEI.
18834 unsigned FrameReg;
18835 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18836 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18837 else
18838 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18839
18840 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18841 PtrVT);
18842 while (Depth--)
18843 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18844 FrameAddr, MachinePointerInfo());
18845 return FrameAddr;
18846}
18847
18848#define GET_REGISTER_MATCHER
18849#include "PPCGenAsmMatcher.inc"
18850
18852 const MachineFunction &MF) const {
18853 bool IsPPC64 = Subtarget.isPPC64();
18854
18855 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18856 if (!Is64Bit && VT != LLT::scalar(32))
18857 report_fatal_error("Invalid register global variable type");
18858
18860 if (!Reg)
18861 return Reg;
18862
18863 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18864 // Need followup investigation as to why.
18865 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18866 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18867 StringRef(RegName) + "\"."));
18868
18869 // Convert GPR to GP8R register for 64bit.
18870 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18871 Reg = Reg.id() - PPC::R0 + PPC::X0;
18872
18873 return Reg;
18874}
18875
18877 // 32-bit SVR4 ABI access everything as got-indirect.
18878 if (Subtarget.is32BitELFABI())
18879 return true;
18880
18881 // AIX accesses everything indirectly through the TOC, which is similar to
18882 // the GOT.
18883 if (Subtarget.isAIXABI())
18884 return true;
18885
18887 // If it is small or large code model, module locals are accessed
18888 // indirectly by loading their address from .toc/.got.
18889 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18890 return true;
18891
18892 // JumpTable and BlockAddress are accessed as got-indirect.
18894 return true;
18895
18897 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18898
18899 return false;
18900}
18901
18902bool
18904 // The PowerPC target isn't yet aware of offsets.
18905 return false;
18906}
18907
18910 MachineFunction &MF, unsigned Intrinsic) const {
18911 IntrinsicInfo Info;
18912 switch (Intrinsic) {
18913 case Intrinsic::ppc_atomicrmw_xchg_i128:
18914 case Intrinsic::ppc_atomicrmw_add_i128:
18915 case Intrinsic::ppc_atomicrmw_sub_i128:
18916 case Intrinsic::ppc_atomicrmw_nand_i128:
18917 case Intrinsic::ppc_atomicrmw_and_i128:
18918 case Intrinsic::ppc_atomicrmw_or_i128:
18919 case Intrinsic::ppc_atomicrmw_xor_i128:
18920 case Intrinsic::ppc_cmpxchg_i128:
18921 Info.opc = ISD::INTRINSIC_W_CHAIN;
18922 Info.memVT = MVT::i128;
18923 Info.ptrVal = I.getArgOperand(0);
18924 Info.offset = 0;
18925 Info.align = Align(16);
18928 Infos.push_back(Info);
18929 return;
18930 case Intrinsic::ppc_atomic_load_i128:
18931 Info.opc = ISD::INTRINSIC_W_CHAIN;
18932 Info.memVT = MVT::i128;
18933 Info.ptrVal = I.getArgOperand(0);
18934 Info.offset = 0;
18935 Info.align = Align(16);
18937 Infos.push_back(Info);
18938 return;
18939 case Intrinsic::ppc_atomic_store_i128:
18940 Info.opc = ISD::INTRINSIC_VOID;
18941 Info.memVT = MVT::i128;
18942 Info.ptrVal = I.getArgOperand(2);
18943 Info.offset = 0;
18944 Info.align = Align(16);
18946 Infos.push_back(Info);
18947 return;
18948 case Intrinsic::ppc_altivec_lvx:
18949 case Intrinsic::ppc_altivec_lvxl:
18950 case Intrinsic::ppc_altivec_lvebx:
18951 case Intrinsic::ppc_altivec_lvehx:
18952 case Intrinsic::ppc_altivec_lvewx:
18953 case Intrinsic::ppc_vsx_lxvd2x:
18954 case Intrinsic::ppc_vsx_lxvw4x:
18955 case Intrinsic::ppc_vsx_lxvd2x_be:
18956 case Intrinsic::ppc_vsx_lxvw4x_be:
18957 case Intrinsic::ppc_vsx_lxvl:
18958 case Intrinsic::ppc_vsx_lxvll: {
18959 EVT VT;
18960 switch (Intrinsic) {
18961 case Intrinsic::ppc_altivec_lvebx:
18962 VT = MVT::i8;
18963 break;
18964 case Intrinsic::ppc_altivec_lvehx:
18965 VT = MVT::i16;
18966 break;
18967 case Intrinsic::ppc_altivec_lvewx:
18968 VT = MVT::i32;
18969 break;
18970 case Intrinsic::ppc_vsx_lxvd2x:
18971 case Intrinsic::ppc_vsx_lxvd2x_be:
18972 VT = MVT::v2f64;
18973 break;
18974 default:
18975 VT = MVT::v4i32;
18976 break;
18977 }
18978
18979 Info.opc = ISD::INTRINSIC_W_CHAIN;
18980 Info.memVT = VT;
18981 Info.ptrVal = I.getArgOperand(0);
18982 Info.offset = -VT.getStoreSize()+1;
18983 Info.size = 2*VT.getStoreSize()-1;
18984 Info.align = Align(1);
18985 Info.flags = MachineMemOperand::MOLoad;
18986 Infos.push_back(Info);
18987 return;
18988 }
18989 case Intrinsic::ppc_altivec_stvx:
18990 case Intrinsic::ppc_altivec_stvxl:
18991 case Intrinsic::ppc_altivec_stvebx:
18992 case Intrinsic::ppc_altivec_stvehx:
18993 case Intrinsic::ppc_altivec_stvewx:
18994 case Intrinsic::ppc_vsx_stxvd2x:
18995 case Intrinsic::ppc_vsx_stxvw4x:
18996 case Intrinsic::ppc_vsx_stxvd2x_be:
18997 case Intrinsic::ppc_vsx_stxvw4x_be:
18998 case Intrinsic::ppc_vsx_stxvl:
18999 case Intrinsic::ppc_vsx_stxvll: {
19000 EVT VT;
19001 switch (Intrinsic) {
19002 case Intrinsic::ppc_altivec_stvebx:
19003 VT = MVT::i8;
19004 break;
19005 case Intrinsic::ppc_altivec_stvehx:
19006 VT = MVT::i16;
19007 break;
19008 case Intrinsic::ppc_altivec_stvewx:
19009 VT = MVT::i32;
19010 break;
19011 case Intrinsic::ppc_vsx_stxvd2x:
19012 case Intrinsic::ppc_vsx_stxvd2x_be:
19013 VT = MVT::v2f64;
19014 break;
19015 default:
19016 VT = MVT::v4i32;
19017 break;
19018 }
19019
19020 Info.opc = ISD::INTRINSIC_VOID;
19021 Info.memVT = VT;
19022 Info.ptrVal = I.getArgOperand(1);
19023 Info.offset = -VT.getStoreSize()+1;
19024 Info.size = 2*VT.getStoreSize()-1;
19025 Info.align = Align(1);
19026 Info.flags = MachineMemOperand::MOStore;
19027 Infos.push_back(Info);
19028 return;
19029 }
19030 case Intrinsic::ppc_stdcx:
19031 case Intrinsic::ppc_stwcx:
19032 case Intrinsic::ppc_sthcx:
19033 case Intrinsic::ppc_stbcx: {
19034 EVT VT;
19035 auto Alignment = Align(8);
19036 switch (Intrinsic) {
19037 case Intrinsic::ppc_stdcx:
19038 VT = MVT::i64;
19039 break;
19040 case Intrinsic::ppc_stwcx:
19041 VT = MVT::i32;
19042 Alignment = Align(4);
19043 break;
19044 case Intrinsic::ppc_sthcx:
19045 VT = MVT::i16;
19046 Alignment = Align(2);
19047 break;
19048 case Intrinsic::ppc_stbcx:
19049 VT = MVT::i8;
19050 Alignment = Align(1);
19051 break;
19052 }
19053 Info.opc = ISD::INTRINSIC_W_CHAIN;
19054 Info.memVT = VT;
19055 Info.ptrVal = I.getArgOperand(0);
19056 Info.offset = 0;
19057 Info.align = Alignment;
19059 Infos.push_back(Info);
19060 return;
19061 }
19062 default:
19063 break;
19064 }
19065}
19066
19067/// It returns EVT::Other if the type should be determined using generic
19068/// target-independent logic.
19070 LLVMContext &Context, const MemOp &Op,
19071 const AttributeList &FuncAttributes) const {
19072 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19073 // We should use Altivec/VSX loads and stores when available. For unaligned
19074 // addresses, unaligned VSX loads are only fast starting with the P8.
19075 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19076 if (Op.isMemset() && Subtarget.hasVSX()) {
19077 uint64_t TailSize = Op.size() % 16;
19078 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19079 // element if vector element type matches tail store. For tail size
19080 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19081 if (TailSize > 2 && TailSize <= 4) {
19082 return MVT::v8i16;
19083 }
19084 return MVT::v4i32;
19085 }
19086 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19087 return MVT::v4i32;
19088 }
19089 }
19090
19091 if (Subtarget.isPPC64()) {
19092 return MVT::i64;
19093 }
19094
19095 return MVT::i32;
19096}
19097
19098/// Returns true if it is beneficial to convert a load of a constant
19099/// to just the constant itself.
19101 Type *Ty) const {
19102 assert(Ty->isIntegerTy());
19103
19104 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19105 return !(BitSize == 0 || BitSize > 64);
19106}
19107
19109 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19110 return false;
19111 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19112 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19113 return NumBits1 == 64 && NumBits2 == 32;
19114}
19115
19117 if (!VT1.isInteger() || !VT2.isInteger())
19118 return false;
19119 unsigned NumBits1 = VT1.getSizeInBits();
19120 unsigned NumBits2 = VT2.getSizeInBits();
19121 return NumBits1 == 64 && NumBits2 == 32;
19122}
19123
19125 // Generally speaking, zexts are not free, but they are free when they can be
19126 // folded with other operations.
19127 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19128 EVT MemVT = LD->getMemoryVT();
19129 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19130 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19131 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19132 LD->getExtensionType() == ISD::ZEXTLOAD))
19133 return true;
19134 }
19135
19136 // FIXME: Add other cases...
19137 // - 32-bit shifts with a zext to i64
19138 // - zext after ctlz, bswap, etc.
19139 // - zext after and by a constant mask
19140
19141 return TargetLowering::isZExtFree(Val, VT2);
19142}
19143
19144bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19145 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19146 "invalid fpext types");
19147 // Extending to float128 is not free.
19148 if (DestVT == MVT::f128)
19149 return false;
19150 return true;
19151}
19152
19154 return isInt<16>(Imm) || isUInt<16>(Imm);
19155}
19156
19158 return isInt<16>(Imm) || isUInt<16>(Imm);
19159}
19160
19163 unsigned *Fast) const {
19165 return false;
19166
19167 // PowerPC supports unaligned memory access for simple non-vector types.
19168 // Although accessing unaligned addresses is not as efficient as accessing
19169 // aligned addresses, it is generally more efficient than manual expansion,
19170 // and generally only traps for software emulation when crossing page
19171 // boundaries.
19172
19173 if (!VT.isSimple())
19174 return false;
19175
19176 if (VT.isFloatingPoint() && !VT.isVector() &&
19177 !Subtarget.allowsUnalignedFPAccess())
19178 return false;
19179
19180 if (VT.getSimpleVT().isVector()) {
19181 if (Subtarget.hasVSX()) {
19182 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19183 VT != MVT::v4f32 && VT != MVT::v4i32)
19184 return false;
19185 } else {
19186 return false;
19187 }
19188 }
19189
19190 if (VT == MVT::ppcf128)
19191 return false;
19192
19193 if (Fast)
19194 *Fast = 1;
19195
19196 return true;
19197}
19198
19200 SDValue C) const {
19201 // Check integral scalar types.
19202 if (!VT.isScalarInteger())
19203 return false;
19204 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19205 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19206 return false;
19207 // This transformation will generate >= 2 operations. But the following
19208 // cases will generate <= 2 instructions during ISEL. So exclude them.
19209 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19210 // HW instruction, ie. MULLI
19211 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19212 // instruction is needed than case 1, ie. MULLI and RLDICR
19213 int64_t Imm = ConstNode->getSExtValue();
19214 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19215 Imm >>= Shift;
19216 if (isInt<16>(Imm))
19217 return false;
19218 uint64_t UImm = static_cast<uint64_t>(Imm);
19219 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19220 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19221 return true;
19222 }
19223 return false;
19224}
19225
19231
19233 Type *Ty) const {
19234 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19235 return false;
19236 switch (Ty->getScalarType()->getTypeID()) {
19237 case Type::FloatTyID:
19238 case Type::DoubleTyID:
19239 return true;
19240 case Type::FP128TyID:
19241 return Subtarget.hasP9Vector();
19242 default:
19243 return false;
19244 }
19245}
19246
19247// FIXME: add more patterns which are not profitable to hoist.
19249 if (!I->hasOneUse())
19250 return true;
19251
19252 Instruction *User = I->user_back();
19253 assert(User && "A single use instruction with no uses.");
19254
19255 switch (I->getOpcode()) {
19256 case Instruction::FMul: {
19257 // Don't break FMA, PowerPC prefers FMA.
19258 if (User->getOpcode() != Instruction::FSub &&
19259 User->getOpcode() != Instruction::FAdd)
19260 return true;
19261
19263 const Function *F = I->getFunction();
19264 const DataLayout &DL = F->getDataLayout();
19265 Type *Ty = User->getOperand(0)->getType();
19266 bool AllowContract = I->getFastMathFlags().allowContract() &&
19267 User->getFastMathFlags().allowContract();
19268
19269 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19271 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19272 }
19273 case Instruction::Load: {
19274 // Don't break "store (load float*)" pattern, this pattern will be combined
19275 // to "store (load int32)" in later InstCombine pass. See function
19276 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19277 // cycles than loading a 32 bit integer.
19278 LoadInst *LI = cast<LoadInst>(I);
19279 // For the loads that combineLoadToOperationType does nothing, like
19280 // ordered load, it should be profitable to hoist them.
19281 // For swifterror load, it can only be used for pointer to pointer type, so
19282 // later type check should get rid of this case.
19283 if (!LI->isUnordered())
19284 return true;
19285
19286 if (User->getOpcode() != Instruction::Store)
19287 return true;
19288
19289 if (I->getType()->getTypeID() != Type::FloatTyID)
19290 return true;
19291
19292 return false;
19293 }
19294 default:
19295 return true;
19296 }
19297 return true;
19298}
19299
19300const MCPhysReg *
19302 // LR is a callee-save register, but we must treat it as clobbered by any call
19303 // site. Hence we include LR in the scratch registers, which are in turn added
19304 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19305 // to CTR, which is used by any indirect call.
19306 static const MCPhysReg ScratchRegs[] = {
19307 PPC::X12, PPC::LR8, PPC::CTR8, 0
19308 };
19309
19310 return ScratchRegs;
19311}
19312
19314 const Constant *PersonalityFn) const {
19315 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19316}
19317
19319 const Constant *PersonalityFn) const {
19320 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19321}
19322
19323bool
19325 EVT VT , unsigned DefinedValues) const {
19326 if (VT == MVT::v2i64)
19327 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19328
19329 if (Subtarget.hasVSX())
19330 return true;
19331
19333}
19334
19336 if (DisableILPPref || Subtarget.enableMachineScheduler())
19338
19339 return Sched::ILP;
19340}
19341
19342// Create a fast isel object.
19344 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19345 const LibcallLoweringInfo *LibcallLowering) const {
19346 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19347}
19348
19349// 'Inverted' means the FMA opcode after negating one multiplicand.
19350// For example, (fma -a b c) = (fnmsub a b c)
19351static unsigned invertFMAOpcode(unsigned Opc) {
19352 switch (Opc) {
19353 default:
19354 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19355 case ISD::FMA:
19356 return PPCISD::FNMSUB;
19357 case PPCISD::FNMSUB:
19358 return ISD::FMA;
19359 }
19360}
19361
19363 bool LegalOps, bool OptForSize,
19365 unsigned Depth) const {
19367 return SDValue();
19368
19369 unsigned Opc = Op.getOpcode();
19370 EVT VT = Op.getValueType();
19371 SDNodeFlags Flags = Op.getNode()->getFlags();
19372
19373 switch (Opc) {
19374 case PPCISD::FNMSUB:
19375 if (!Op.hasOneUse() || !isTypeLegal(VT))
19376 break;
19377
19378 SDValue N0 = Op.getOperand(0);
19379 SDValue N1 = Op.getOperand(1);
19380 SDValue N2 = Op.getOperand(2);
19381 SDLoc Loc(Op);
19382
19384 SDValue NegN2 =
19385 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19386
19387 if (!NegN2)
19388 return SDValue();
19389
19390 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19391 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19392 // These transformations may change sign of zeroes. For example,
19393 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19394 if (Flags.hasNoSignedZeros()) {
19395 // Try and choose the cheaper one to negate.
19397 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19398 N0Cost, Depth + 1);
19399
19401 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19402 N1Cost, Depth + 1);
19403
19404 if (NegN0 && N0Cost <= N1Cost) {
19405 Cost = std::min(N0Cost, N2Cost);
19406 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19407 } else if (NegN1) {
19408 Cost = std::min(N1Cost, N2Cost);
19409 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19410 }
19411 }
19412
19413 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19414 if (isOperationLegal(ISD::FMA, VT)) {
19415 Cost = N2Cost;
19416 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19417 }
19418
19419 break;
19420 }
19421
19422 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19423 Cost, Depth);
19424}
19425
19426// Override to enable LOAD_STACK_GUARD lowering on Linux.
19428 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19429 return true;
19431}
19432
19434 bool ForCodeSize) const {
19435 if (!VT.isSimple() || !Subtarget.hasVSX())
19436 return false;
19437
19438 switch(VT.getSimpleVT().SimpleTy) {
19439 default:
19440 // For FP types that are currently not supported by PPC backend, return
19441 // false. Examples: f16, f80.
19442 return false;
19443 case MVT::f32:
19444 case MVT::f64: {
19445 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19446 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19447 return true;
19448 }
19449 bool IsExact;
19450 APSInt IntResult(16, false);
19451 // The rounding mode doesn't really matter because we only care about floats
19452 // that can be converted to integers exactly.
19453 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19454 // For exact values in the range [-16, 15] we can materialize the float.
19455 if (IsExact && IntResult <= 15 && IntResult >= -16)
19456 return true;
19457 return Imm.isZero();
19458 }
19459 case MVT::ppcf128:
19460 return Imm.isPosZero();
19461 }
19462}
19463
19464// For vector shift operation op, fold
19465// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19467 SelectionDAG &DAG) {
19468 SDValue N0 = N->getOperand(0);
19469 SDValue N1 = N->getOperand(1);
19470 EVT VT = N0.getValueType();
19471 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19472 unsigned Opcode = N->getOpcode();
19473 unsigned TargetOpcode;
19474
19475 switch (Opcode) {
19476 default:
19477 llvm_unreachable("Unexpected shift operation");
19478 case ISD::SHL:
19479 TargetOpcode = PPCISD::SHL;
19480 break;
19481 case ISD::SRL:
19482 TargetOpcode = PPCISD::SRL;
19483 break;
19484 case ISD::SRA:
19485 TargetOpcode = PPCISD::SRA;
19486 break;
19487 }
19488
19489 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19490 N1->getOpcode() == ISD::AND)
19491 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19492 if (Mask->getZExtValue() == OpSizeInBits - 1)
19493 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19494
19495 return SDValue();
19496}
19497
19498SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19499 DAGCombinerInfo &DCI) const {
19500 EVT VT = N->getValueType(0);
19501 assert(VT.isVector() && "Vector type expected.");
19502
19503 unsigned Opc = N->getOpcode();
19504 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19505 "Unexpected opcode.");
19506
19507 if (!isOperationLegal(Opc, VT))
19508 return SDValue();
19509
19510 EVT EltTy = VT.getScalarType();
19511 unsigned EltBits = EltTy.getSizeInBits();
19512 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19513 return SDValue();
19514
19515 SDValue N1 = N->getOperand(1);
19516 uint64_t SplatBits = 0;
19517 bool AddSplatCase = false;
19518 unsigned OpcN1 = N1.getOpcode();
19519 if (OpcN1 == PPCISD::VADD_SPLAT &&
19521 AddSplatCase = true;
19522 SplatBits = N1.getConstantOperandVal(0);
19523 }
19524
19525 if (!AddSplatCase) {
19526 if (OpcN1 != ISD::BUILD_VECTOR)
19527 return SDValue();
19528
19529 unsigned SplatBitSize;
19530 bool HasAnyUndefs;
19531 APInt APSplatBits, APSplatUndef;
19532 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19533 bool BVNIsConstantSplat =
19534 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19535 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19536 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19537 return SDValue();
19538 SplatBits = APSplatBits.getZExtValue();
19539 }
19540
19541 SDLoc DL(N);
19542 SDValue N0 = N->getOperand(0);
19543 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19544 // shift vector, which means the max value is 31/63. A shift vector of all
19545 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19546 // -16 to 15 range.
19547 if (SplatBits == (EltBits - 1)) {
19548 unsigned NewOpc;
19549 switch (Opc) {
19550 case ISD::SHL:
19551 NewOpc = PPCISD::SHL;
19552 break;
19553 case ISD::SRL:
19554 NewOpc = PPCISD::SRL;
19555 break;
19556 case ISD::SRA:
19557 NewOpc = PPCISD::SRA;
19558 break;
19559 }
19560 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19561 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19562 }
19563
19564 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19565 return SDValue();
19566
19567 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19568 // before the BUILD_VECTOR is replaced by a load.
19569 if (EltTy != MVT::i64 || SplatBits != 1)
19570 return SDValue();
19571
19572 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19573}
19574
19575SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19576 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19577 return Value;
19578
19579 if (N->getValueType(0).isVector())
19580 return combineVectorShift(N, DCI);
19581
19582 SDValue N0 = N->getOperand(0);
19583 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19584 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19585 N0.getOpcode() != ISD::SIGN_EXTEND ||
19586 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19587 N->getValueType(0) != MVT::i64)
19588 return SDValue();
19589
19590 // We can't save an operation here if the value is already extended, and
19591 // the existing shift is easier to combine.
19592 SDValue ExtsSrc = N0.getOperand(0);
19593 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19594 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19595 return SDValue();
19596
19597 SDLoc DL(N0);
19598 SDValue ShiftBy = SDValue(CN1, 0);
19599 // We want the shift amount to be i32 on the extswli, but the shift could
19600 // have an i64.
19601 if (ShiftBy.getValueType() == MVT::i64)
19602 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19603
19604 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19605 ShiftBy);
19606}
19607
19608SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19609 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19610 return Value;
19611
19612 if (N->getValueType(0).isVector())
19613 return combineVectorShift(N, DCI);
19614
19615 return SDValue();
19616}
19617
19618SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19619 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19620 return Value;
19621
19622 if (N->getValueType(0).isVector())
19623 return combineVectorShift(N, DCI);
19624
19625 return SDValue();
19626}
19627
19628// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19629// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19630// When C is zero, the equation (addi Z, -C) can be simplified to Z
19631// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19633 const PPCSubtarget &Subtarget) {
19634 if (!Subtarget.isPPC64())
19635 return SDValue();
19636
19637 SDValue LHS = N->getOperand(0);
19638 SDValue RHS = N->getOperand(1);
19639
19640 auto isZextOfCompareWithConstant = [](SDValue Op) {
19641 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19642 Op.getValueType() != MVT::i64)
19643 return false;
19644
19645 SDValue Cmp = Op.getOperand(0);
19646 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19647 Cmp.getOperand(0).getValueType() != MVT::i64)
19648 return false;
19649
19650 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19651 int64_t NegConstant = 0 - Constant->getSExtValue();
19652 // Due to the limitations of the addi instruction,
19653 // -C is required to be [-32768, 32767].
19654 return isInt<16>(NegConstant);
19655 }
19656
19657 return false;
19658 };
19659
19660 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19661 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19662
19663 // If there is a pattern, canonicalize a zext operand to the RHS.
19664 if (LHSHasPattern && !RHSHasPattern)
19665 std::swap(LHS, RHS);
19666 else if (!LHSHasPattern && !RHSHasPattern)
19667 return SDValue();
19668
19669 SDLoc DL(N);
19670 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19671 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19672 SDValue Cmp = RHS.getOperand(0);
19673 SDValue Z = Cmp.getOperand(0);
19674 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19675 int64_t NegConstant = 0 - Constant->getSExtValue();
19676
19677 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19678 default: break;
19679 case ISD::SETNE: {
19680 // when C == 0
19681 // --> addze X, (addic Z, -1).carry
19682 // /
19683 // add X, (zext(setne Z, C))--
19684 // \ when -32768 <= -C <= 32767 && C != 0
19685 // --> addze X, (addic (addi Z, -C), -1).carry
19686 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19687 DAG.getConstant(NegConstant, DL, MVT::i64));
19688 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19689 SDValue Addc =
19690 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19691 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19692 DAG.getConstant(0, DL, CarryType));
19693 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19694 DAG.getConstant(0, DL, MVT::i64),
19695 SDValue(Addc.getNode(), 1));
19696 }
19697 case ISD::SETEQ: {
19698 // when C == 0
19699 // --> addze X, (subfic Z, 0).carry
19700 // /
19701 // add X, (zext(sete Z, C))--
19702 // \ when -32768 <= -C <= 32767 && C != 0
19703 // --> addze X, (subfic (addi Z, -C), 0).carry
19704 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19705 DAG.getConstant(NegConstant, DL, MVT::i64));
19706 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19707 SDValue Subc =
19708 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19709 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19710 DAG.getConstant(0, DL, CarryType));
19711 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19712 DAG.getConstant(1UL, DL, CarryType));
19713 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19714 DAG.getConstant(0, DL, MVT::i64), Invert);
19715 }
19716 }
19717
19718 return SDValue();
19719}
19720
19721// Transform
19722// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19723// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19724// In this case both C1 and C2 must be known constants.
19725// C1+C2 must fit into a 34 bit signed integer.
19727 const PPCSubtarget &Subtarget) {
19728 if (!Subtarget.isUsingPCRelativeCalls())
19729 return SDValue();
19730
19731 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19732 // If we find that node try to cast the Global Address and the Constant.
19733 SDValue LHS = N->getOperand(0);
19734 SDValue RHS = N->getOperand(1);
19735
19736 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19737 std::swap(LHS, RHS);
19738
19739 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19740 return SDValue();
19741
19742 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19745
19746 // Check that both casts succeeded.
19747 if (!GSDN || !ConstNode)
19748 return SDValue();
19749
19750 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19751 SDLoc DL(GSDN);
19752
19753 // The signed int offset needs to fit in 34 bits.
19754 if (!isInt<34>(NewOffset))
19755 return SDValue();
19756
19757 // The new global address is a copy of the old global address except
19758 // that it has the updated Offset.
19759 SDValue GA =
19760 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19761 NewOffset, GSDN->getTargetFlags());
19762 SDValue MatPCRel =
19763 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19764 return MatPCRel;
19765}
19766
19767// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19768// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19769// Mathematical identity: X + 1 = X - (-1)
19770// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19771// Requirement: VSX feature for efficient xxleqv generation
19773 const PPCSubtarget &Subtarget) {
19774
19775 EVT VT = N->getValueType(0);
19776 if (!Subtarget.hasVSX())
19777 return SDValue();
19778
19779 // Handle v2i64, v4i32, v8i16 and v16i8 types
19780 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19781 VT == MVT::v2i64))
19782 return SDValue();
19783
19784 SDValue LHS = N->getOperand(0);
19785 SDValue RHS = N->getOperand(1);
19786
19787 // Check if RHS is BUILD_VECTOR
19788 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19789 return SDValue();
19790
19791 // Check if all the elements are 1
19792 unsigned NumOfEles = RHS.getNumOperands();
19793 for (unsigned i = 0; i < NumOfEles; ++i) {
19794 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19795 if (!CN || CN->getSExtValue() != 1)
19796 return SDValue();
19797 }
19798 SDLoc DL(N);
19799
19800 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19801 SmallVector<SDValue, 4> Ops(4, MinusOne);
19802 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19803
19804 // Bitcast to the target vector type
19805 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19806
19807 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19808}
19809
19810SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19811 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19812 return Value;
19813
19814 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19815 return Value;
19816
19817 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19818 return Value;
19819 return SDValue();
19820}
19821
19822// Detect TRUNCATE operations on bitcasts of float128 values.
19823// What we are looking for here is the situtation where we extract a subset
19824// of bits from a 128 bit float.
19825// This can be of two forms:
19826// 1) BITCAST of f128 feeding TRUNCATE
19827// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19828// The reason this is required is because we do not have a legal i128 type
19829// and so we want to prevent having to store the f128 and then reload part
19830// of it.
19831SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19832 DAGCombinerInfo &DCI) const {
19833 // If we are using CRBits then try that first.
19834 if (Subtarget.useCRBits()) {
19835 // Check if CRBits did anything and return that if it did.
19836 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19837 return CRTruncValue;
19838 }
19839
19840 SDLoc dl(N);
19841 SDValue Op0 = N->getOperand(0);
19842
19843 // Looking for a truncate of i128 to i64.
19844 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19845 return SDValue();
19846
19847 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19848
19849 // SRL feeding TRUNCATE.
19850 if (Op0.getOpcode() == ISD::SRL) {
19851 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19852 // The right shift has to be by 64 bits.
19853 if (!ConstNode || ConstNode->getZExtValue() != 64)
19854 return SDValue();
19855
19856 // Switch the element number to extract.
19857 EltToExtract = EltToExtract ? 0 : 1;
19858 // Update Op0 past the SRL.
19859 Op0 = Op0.getOperand(0);
19860 }
19861
19862 // BITCAST feeding a TRUNCATE possibly via SRL.
19863 if (Op0.getOpcode() == ISD::BITCAST &&
19864 Op0.getValueType() == MVT::i128 &&
19865 Op0.getOperand(0).getValueType() == MVT::f128) {
19866 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19867 return DCI.DAG.getNode(
19868 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19869 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19870 }
19871 return SDValue();
19872}
19873
19874SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19875 SelectionDAG &DAG = DCI.DAG;
19876
19877 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19878 if (!ConstOpOrElement)
19879 return SDValue();
19880
19881 // An imul is usually smaller than the alternative sequence for legal type.
19883 isOperationLegal(ISD::MUL, N->getValueType(0)))
19884 return SDValue();
19885
19886 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19887 switch (this->Subtarget.getCPUDirective()) {
19888 default:
19889 // TODO: enhance the condition for subtarget before pwr8
19890 return false;
19891 case PPC::DIR_PWR8:
19892 // type mul add shl
19893 // scalar 4 1 1
19894 // vector 7 2 2
19895 return true;
19896 case PPC::DIR_PWR9:
19897 case PPC::DIR_PWR10:
19898 case PPC::DIR_PWR11:
19900 // type mul add shl
19901 // scalar 5 2 2
19902 // vector 7 2 2
19903
19904 // The cycle RATIO of related operations are showed as a table above.
19905 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19906 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19907 // are 4, it is always profitable; but for 3 instrs patterns
19908 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19909 // So we should only do it for vector type.
19910 return IsAddOne && IsNeg ? VT.isVector() : true;
19911 }
19912 };
19913
19914 EVT VT = N->getValueType(0);
19915 SDLoc DL(N);
19916
19917 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19918 bool IsNeg = MulAmt.isNegative();
19919 APInt MulAmtAbs = MulAmt.abs();
19920
19921 if ((MulAmtAbs - 1).isPowerOf2()) {
19922 // (mul x, 2^N + 1) => (add (shl x, N), x)
19923 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19924
19925 if (!IsProfitable(IsNeg, true, VT))
19926 return SDValue();
19927
19928 SDValue Op0 = N->getOperand(0);
19929 SDValue Op1 =
19930 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19931 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19932 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19933
19934 if (!IsNeg)
19935 return Res;
19936
19937 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19938 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19939 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19940 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19941
19942 if (!IsProfitable(IsNeg, false, VT))
19943 return SDValue();
19944
19945 SDValue Op0 = N->getOperand(0);
19946 SDValue Op1 =
19947 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19948 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19949
19950 if (!IsNeg)
19951 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19952 else
19953 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19954
19955 } else {
19956 return SDValue();
19957 }
19958}
19959
19960// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19961// in combiner since we need to check SD flags and other subtarget features.
19962SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19963 DAGCombinerInfo &DCI) const {
19964 SDValue N0 = N->getOperand(0);
19965 SDValue N1 = N->getOperand(1);
19966 SDValue N2 = N->getOperand(2);
19967 SDNodeFlags Flags = N->getFlags();
19968 EVT VT = N->getValueType(0);
19969 SelectionDAG &DAG = DCI.DAG;
19970 unsigned Opc = N->getOpcode();
19972 bool LegalOps = !DCI.isBeforeLegalizeOps();
19973 SDLoc Loc(N);
19974
19975 if (!isOperationLegal(ISD::FMA, VT))
19976 return SDValue();
19977
19978 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19979 // since (fnmsub a b c)=-0 while c-ab=+0.
19980 if (!Flags.hasNoSignedZeros())
19981 return SDValue();
19982
19983 // (fma (fneg a) b c) => (fnmsub a b c)
19984 // (fnmsub (fneg a) b c) => (fma a b c)
19985 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19986 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19987
19988 // (fma a (fneg b) c) => (fnmsub a b c)
19989 // (fnmsub a (fneg b) c) => (fma a b c)
19990 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19991 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19992
19993 return SDValue();
19994}
19995
19996bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19997 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19998 if (!Subtarget.is64BitELFABI())
19999 return false;
20000
20001 // If not a tail call then no need to proceed.
20002 if (!CI->isTailCall())
20003 return false;
20004
20005 // If sibling calls have been disabled and tail-calls aren't guaranteed
20006 // there is no reason to duplicate.
20007 auto &TM = getTargetMachine();
20008 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
20009 return false;
20010
20011 // Can't tail call a function called indirectly, or if it has variadic args.
20012 const Function *Callee = CI->getCalledFunction();
20013 if (!Callee || Callee->isVarArg())
20014 return false;
20015
20016 // Make sure the callee and caller calling conventions are eligible for tco.
20017 const Function *Caller = CI->getParent()->getParent();
20018 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
20019 CI->getCallingConv()))
20020 return false;
20021
20022 // If the function is local then we have a good chance at tail-calling it
20023 return getTargetMachine().shouldAssumeDSOLocal(Callee);
20024}
20025
20026bool PPCTargetLowering::
20027isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
20028 const Value *Mask = AndI.getOperand(1);
20029 // If the mask is suitable for andi. or andis. we should sink the and.
20030 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
20031 // Can't handle constants wider than 64-bits.
20032 if (CI->getBitWidth() > 64)
20033 return false;
20034 int64_t ConstVal = CI->getZExtValue();
20035 return isUInt<16>(ConstVal) ||
20036 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
20037 }
20038
20039 // For non-constant masks, we can always use the record-form and.
20040 return true;
20041}
20042
20043/// getAddrModeForFlags - Based on the set of address flags, select the most
20044/// optimal instruction format to match by.
20045PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20046 // This is not a node we should be handling here.
20047 if (Flags == PPC::MOF_None)
20048 return PPC::AM_None;
20049 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20050 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20051 if ((Flags & FlagSet) == FlagSet)
20052 return PPC::AM_DForm;
20053 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20054 if ((Flags & FlagSet) == FlagSet)
20055 return PPC::AM_DSForm;
20056 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20057 if ((Flags & FlagSet) == FlagSet)
20058 return PPC::AM_DQForm;
20059 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20060 if ((Flags & FlagSet) == FlagSet)
20061 return PPC::AM_PrefixDForm;
20062 // If no other forms are selected, return an X-Form as it is the most
20063 // general addressing mode.
20064 return PPC::AM_XForm;
20065}
20066
20067/// Set alignment flags based on whether or not the Frame Index is aligned.
20068/// Utilized when computing flags for address computation when selecting
20069/// load and store instructions.
20070static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20071 SelectionDAG &DAG) {
20072 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20073 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20074 if (!FI)
20075 return;
20077 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20078 // If this is (add $FI, $S16Imm), the alignment flags are already set
20079 // based on the immediate. We just need to clear the alignment flags
20080 // if the FI alignment is weaker.
20081 if ((FrameIndexAlign % 4) != 0)
20082 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20083 if ((FrameIndexAlign % 16) != 0)
20084 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20085 // If the address is a plain FrameIndex, set alignment flags based on
20086 // FI alignment.
20087 if (!IsAdd) {
20088 if ((FrameIndexAlign % 4) == 0)
20089 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20090 if ((FrameIndexAlign % 16) == 0)
20091 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20092 }
20093}
20094
20095/// Given a node, compute flags that are used for address computation when
20096/// selecting load and store instructions. The flags computed are stored in
20097/// FlagSet. This function takes into account whether the node is a constant,
20098/// an ADD, OR, or a constant, and computes the address flags accordingly.
20099static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20100 SelectionDAG &DAG) {
20101 // Set the alignment flags for the node depending on if the node is
20102 // 4-byte or 16-byte aligned.
20103 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20104 if ((Imm & 0x3) == 0)
20105 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20106 if ((Imm & 0xf) == 0)
20107 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20108 };
20109
20111 // All 32-bit constants can be computed as LIS + Disp.
20112 const APInt &ConstImm = CN->getAPIntValue();
20113 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20114 FlagSet |= PPC::MOF_AddrIsSImm32;
20115 SetAlignFlagsForImm(ConstImm.getZExtValue());
20116 setAlignFlagsForFI(N, FlagSet, DAG);
20117 }
20118 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20119 FlagSet |= PPC::MOF_RPlusSImm34;
20120 else // Let constant materialization handle large constants.
20121 FlagSet |= PPC::MOF_NotAddNorCst;
20122 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20123 // This address can be represented as an addition of:
20124 // - Register + Imm16 (possibly a multiple of 4/16)
20125 // - Register + Imm34
20126 // - Register + PPCISD::Lo
20127 // - Register + Register
20128 // In any case, we won't have to match this as Base + Zero.
20129 SDValue RHS = N.getOperand(1);
20131 const APInt &ConstImm = CN->getAPIntValue();
20132 if (ConstImm.isSignedIntN(16)) {
20133 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20134 SetAlignFlagsForImm(ConstImm.getZExtValue());
20135 setAlignFlagsForFI(N, FlagSet, DAG);
20136 }
20137 if (ConstImm.isSignedIntN(34))
20138 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20139 else
20140 FlagSet |= PPC::MOF_RPlusR; // Register.
20141 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20142 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20143 else
20144 FlagSet |= PPC::MOF_RPlusR;
20145 } else { // The address computation is not a constant or an addition.
20146 setAlignFlagsForFI(N, FlagSet, DAG);
20147 FlagSet |= PPC::MOF_NotAddNorCst;
20148 }
20149}
20150
20151static bool isPCRelNode(SDValue N) {
20152 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20157}
20158
20159/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20160/// the address flags of the load/store instruction that is to be matched.
20161unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20162 SelectionDAG &DAG) const {
20163 unsigned FlagSet = PPC::MOF_None;
20164
20165 // Compute subtarget flags.
20166 if (!Subtarget.hasP9Vector())
20167 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20168 else
20169 FlagSet |= PPC::MOF_SubtargetP9;
20170
20171 if (Subtarget.hasPrefixInstrs())
20172 FlagSet |= PPC::MOF_SubtargetP10;
20173
20174 if (Subtarget.hasSPE())
20175 FlagSet |= PPC::MOF_SubtargetSPE;
20176
20177 // Check if we have a PCRel node and return early.
20178 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20179 return FlagSet;
20180
20181 // If the node is the paired load/store intrinsics, compute flags for
20182 // address computation and return early.
20183 unsigned ParentOp = Parent->getOpcode();
20184 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20185 (ParentOp == ISD::INTRINSIC_VOID))) {
20186 unsigned ID = Parent->getConstantOperandVal(1);
20187 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20188 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20189 ? Parent->getOperand(2)
20190 : Parent->getOperand(3);
20191 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20192 FlagSet |= PPC::MOF_Vector;
20193 return FlagSet;
20194 }
20195 }
20196
20197 // Mark this as something we don't want to handle here if it is atomic
20198 // or pre-increment instruction.
20199 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20200 if (LSB->isIndexed())
20201 return PPC::MOF_None;
20202
20203 // Compute in-memory type flags. This is based on if there are scalars,
20204 // floats or vectors.
20205 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20206 assert(MN && "Parent should be a MemSDNode!");
20207 EVT MemVT = MN->getMemoryVT();
20208 unsigned Size = MemVT.getSizeInBits();
20209 if (MemVT.isScalarInteger()) {
20210 assert(Size <= 128 &&
20211 "Not expecting scalar integers larger than 16 bytes!");
20212 if (Size < 32)
20213 FlagSet |= PPC::MOF_SubWordInt;
20214 else if (Size == 32)
20215 FlagSet |= PPC::MOF_WordInt;
20216 else
20217 FlagSet |= PPC::MOF_DoubleWordInt;
20218 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20219 if (Size == 128)
20220 FlagSet |= PPC::MOF_Vector;
20221 else if (Size == 256) {
20222 assert(Subtarget.pairedVectorMemops() &&
20223 "256-bit vectors are only available when paired vector memops is "
20224 "enabled!");
20225 FlagSet |= PPC::MOF_Vector;
20226 } else
20227 llvm_unreachable("Not expecting illegal vectors!");
20228 } else { // Floating point type: can be scalar, f128 or vector types.
20229 if (Size == 32 || Size == 64)
20230 FlagSet |= PPC::MOF_ScalarFloat;
20231 else if (MemVT == MVT::f128 || MemVT.isVector())
20232 FlagSet |= PPC::MOF_Vector;
20233 else
20234 llvm_unreachable("Not expecting illegal scalar floats!");
20235 }
20236
20237 // Compute flags for address computation.
20238 computeFlagsForAddressComputation(N, FlagSet, DAG);
20239
20240 // Compute type extension flags.
20241 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20242 switch (LN->getExtensionType()) {
20243 case ISD::SEXTLOAD:
20244 FlagSet |= PPC::MOF_SExt;
20245 break;
20246 case ISD::EXTLOAD:
20247 case ISD::ZEXTLOAD:
20248 FlagSet |= PPC::MOF_ZExt;
20249 break;
20250 case ISD::NON_EXTLOAD:
20251 FlagSet |= PPC::MOF_NoExt;
20252 break;
20253 }
20254 } else
20255 FlagSet |= PPC::MOF_NoExt;
20256
20257 // For integers, no extension is the same as zero extension.
20258 // We set the extension mode to zero extension so we don't have
20259 // to add separate entries in AddrModesMap for loads and stores.
20260 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20261 FlagSet |= PPC::MOF_ZExt;
20262 FlagSet &= ~PPC::MOF_NoExt;
20263 }
20264
20265 // If we don't have prefixed instructions, 34-bit constants should be
20266 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20267 bool IsNonP1034BitConst =
20269 FlagSet) == PPC::MOF_RPlusSImm34;
20270 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20271 IsNonP1034BitConst)
20272 FlagSet |= PPC::MOF_NotAddNorCst;
20273
20274 return FlagSet;
20275}
20276
20277/// SelectForceXFormMode - Given the specified address, force it to be
20278/// represented as an indexed [r+r] operation (an XForm instruction).
20280 SDValue &Base,
20281 SelectionDAG &DAG) const {
20282
20284 int16_t ForceXFormImm = 0;
20285 if (provablyDisjointOr(DAG, N) &&
20286 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20287 Disp = N.getOperand(0);
20288 Base = N.getOperand(1);
20289 return Mode;
20290 }
20291
20292 // If the address is the result of an add, we will utilize the fact that the
20293 // address calculation includes an implicit add. However, we can reduce
20294 // register pressure if we do not materialize a constant just for use as the
20295 // index register. We only get rid of the add if it is not an add of a
20296 // value and a 16-bit signed constant and both have a single use.
20297 if (N.getOpcode() == ISD::ADD &&
20298 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20299 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20300 Disp = N.getOperand(0);
20301 Base = N.getOperand(1);
20302 return Mode;
20303 }
20304
20305 // Otherwise, use R0 as the base register.
20306 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20307 N.getValueType());
20308 Base = N;
20309
20310 return Mode;
20311}
20312
20314 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20315 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20316 EVT ValVT = Val.getValueType();
20317 // If we are splitting a scalar integer into f64 parts (i.e. so they
20318 // can be placed into VFRC registers), we need to zero extend and
20319 // bitcast the values. This will ensure the value is placed into a
20320 // VSR using direct moves or stack operations as needed.
20321 if (PartVT == MVT::f64 &&
20322 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20323 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20324 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20325 Parts[0] = Val;
20326 return true;
20327 }
20328 return false;
20329}
20330
20331SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20332 SelectionDAG &DAG) const {
20333 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20335 EVT RetVT = Op.getValueType();
20336 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20337 SDValue Callee =
20338 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20339 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20341 for (const SDValue &N : Op->op_values()) {
20342 EVT ArgVT = N.getValueType();
20343 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20344 TargetLowering::ArgListEntry Entry(N, ArgTy);
20345 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20346 Entry.IsZExt = !Entry.IsSExt;
20347 Args.push_back(Entry);
20348 }
20349
20350 SDValue InChain = DAG.getEntryNode();
20351 SDValue TCChain = InChain;
20352 const Function &F = DAG.getMachineFunction().getFunction();
20353 bool isTailCall =
20354 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20355 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20356 if (isTailCall)
20357 InChain = TCChain;
20358 CLI.setDebugLoc(SDLoc(Op))
20359 .setChain(InChain)
20360 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20361 .setTailCall(isTailCall)
20362 .setSExtResult(SignExtend)
20363 .setZExtResult(!SignExtend)
20365 return TLI.LowerCallTo(CLI).first;
20366}
20367
20368SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20369 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20370 SelectionDAG &DAG) const {
20371 if (Op.getValueType() == MVT::f32)
20372 return lowerToLibCall(LibCallFloatName, Op, DAG);
20373
20374 if (Op.getValueType() == MVT::f64)
20375 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20376
20377 return SDValue();
20378}
20379
20380bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20381 SDNodeFlags Flags = Op.getNode()->getFlags();
20382 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20383 Flags.hasNoNaNs() && Flags.hasNoInfs();
20384}
20385
20386bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20387 return Op.getNode()->getFlags().hasApproximateFuncs();
20388}
20389
20390bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20392}
20393
20394SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20395 const char *LibCallFloatName,
20396 const char *LibCallDoubleNameFinite,
20397 const char *LibCallFloatNameFinite,
20398 SDValue Op,
20399 SelectionDAG &DAG) const {
20400 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20401 return SDValue();
20402
20403 if (!isLowringToMASSFiniteSafe(Op))
20404 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20405 DAG);
20406
20407 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20408 LibCallDoubleNameFinite, Op, DAG);
20409}
20410
20411SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20412 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20413 "__xl_powf_finite", Op, DAG);
20414}
20415
20416SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20417 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20418 "__xl_sinf_finite", Op, DAG);
20419}
20420
20421SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20422 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20423 "__xl_cosf_finite", Op, DAG);
20424}
20425
20426SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20427 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20428 "__xl_logf_finite", Op, DAG);
20429}
20430
20431SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20432 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20433 "__xl_log10f_finite", Op, DAG);
20434}
20435
20436SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20437 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20438 "__xl_expf_finite", Op, DAG);
20439}
20440
20441// If we happen to match to an aligned D-Form, check if the Frame Index is
20442// adequately aligned. If it is not, reset the mode to match to X-Form.
20443static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20446 return;
20447 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20450}
20451
20452/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20453/// compute the address flags of the node, get the optimal address mode based
20454/// on the flags, and set the Base and Disp based on the address mode.
20456 SDValue N, SDValue &Disp,
20457 SDValue &Base,
20458 SelectionDAG &DAG,
20459 MaybeAlign Align) const {
20460 SDLoc DL(Parent);
20461
20462 // Compute the address flags.
20463 unsigned Flags = computeMOFlags(Parent, N, DAG);
20464
20465 // Get the optimal address mode based on the Flags.
20466 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20467
20468 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20469 // Select an X-Form load if it is not.
20470 setXFormForUnalignedFI(N, Flags, Mode);
20471
20472 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20473 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20474 assert(Subtarget.isUsingPCRelativeCalls() &&
20475 "Must be using PC-Relative calls when a valid PC-Relative node is "
20476 "present!");
20477 Mode = PPC::AM_PCRel;
20478 }
20479
20480 // Set Base and Disp accordingly depending on the address mode.
20481 switch (Mode) {
20482 case PPC::AM_DForm:
20483 case PPC::AM_DSForm:
20484 case PPC::AM_DQForm: {
20485 // This is a register plus a 16-bit immediate. The base will be the
20486 // register and the displacement will be the immediate unless it
20487 // isn't sufficiently aligned.
20488 if (Flags & PPC::MOF_RPlusSImm16) {
20489 SDValue Op0 = N.getOperand(0);
20490 SDValue Op1 = N.getOperand(1);
20491 int16_t Imm = Op1->getAsZExtVal();
20492 if (!Align || isAligned(*Align, Imm)) {
20493 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20494 Base = Op0;
20496 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20497 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20498 }
20499 break;
20500 }
20501 }
20502 // This is a register plus the @lo relocation. The base is the register
20503 // and the displacement is the global address.
20504 else if (Flags & PPC::MOF_RPlusLo) {
20505 Disp = N.getOperand(1).getOperand(0); // The global address.
20510 Base = N.getOperand(0);
20511 break;
20512 }
20513 // This is a constant address at most 32 bits. The base will be
20514 // zero or load-immediate-shifted and the displacement will be
20515 // the low 16 bits of the address.
20516 else if (Flags & PPC::MOF_AddrIsSImm32) {
20517 auto *CN = cast<ConstantSDNode>(N);
20518 EVT CNType = CN->getValueType(0);
20519 uint64_t CNImm = CN->getZExtValue();
20520 // If this address fits entirely in a 16-bit sext immediate field, codegen
20521 // this as "d, 0".
20522 int16_t Imm;
20523 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20524 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20525 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20526 CNType);
20527 break;
20528 }
20529 // Handle 32-bit sext immediate with LIS + Addr mode.
20530 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20531 (!Align || isAligned(*Align, CNImm))) {
20532 int32_t Addr = (int32_t)CNImm;
20533 // Otherwise, break this down into LIS + Disp.
20534 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20535 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20536 MVT::i32);
20537 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20538 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20539 break;
20540 }
20541 }
20542 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20543 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20545 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20546 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20547 } else
20548 Base = N;
20549 break;
20550 }
20551 case PPC::AM_PrefixDForm: {
20552 int64_t Imm34 = 0;
20553 unsigned Opcode = N.getOpcode();
20554 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20555 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20556 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20557 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20558 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20559 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20560 else
20561 Base = N.getOperand(0);
20562 } else if (isIntS34Immediate(N, Imm34)) {
20563 // The address is a 34-bit signed immediate.
20564 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20565 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20566 }
20567 break;
20568 }
20569 case PPC::AM_PCRel: {
20570 // When selecting PC-Relative instructions, "Base" is not utilized as
20571 // we select the address as [PC+imm].
20572 Disp = N;
20573 break;
20574 }
20575 case PPC::AM_None:
20576 break;
20577 default: { // By default, X-Form is always available to be selected.
20578 // When a frame index is not aligned, we also match by XForm.
20580 Base = FI ? N : N.getOperand(1);
20581 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20582 N.getValueType())
20583 : N.getOperand(0);
20584 break;
20585 }
20586 }
20587 return Mode;
20588}
20589
20591 bool Return,
20592 bool IsVarArg) const {
20593 switch (CC) {
20594 case CallingConv::Cold:
20595 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20596 default:
20597 return CC_PPC64_ELF;
20598 }
20599}
20600
20602 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20603}
20604
20607 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20608 if (shouldInlineQuadwordAtomics() && Size == 128)
20610
20611 switch (AI->getOperation()) {
20617 default:
20619 }
20620
20621 llvm_unreachable("unreachable atomicrmw operation");
20622}
20623
20632
20633static Intrinsic::ID
20635 switch (BinOp) {
20636 default:
20637 llvm_unreachable("Unexpected AtomicRMW BinOp");
20639 return Intrinsic::ppc_atomicrmw_xchg_i128;
20640 case AtomicRMWInst::Add:
20641 return Intrinsic::ppc_atomicrmw_add_i128;
20642 case AtomicRMWInst::Sub:
20643 return Intrinsic::ppc_atomicrmw_sub_i128;
20644 case AtomicRMWInst::And:
20645 return Intrinsic::ppc_atomicrmw_and_i128;
20646 case AtomicRMWInst::Or:
20647 return Intrinsic::ppc_atomicrmw_or_i128;
20648 case AtomicRMWInst::Xor:
20649 return Intrinsic::ppc_atomicrmw_xor_i128;
20651 return Intrinsic::ppc_atomicrmw_nand_i128;
20652 }
20653}
20654
20656 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20657 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20658 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20659 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20660 Type *ValTy = Incr->getType();
20661 assert(ValTy->getPrimitiveSizeInBits() == 128);
20662 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20663 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20664 Value *IncrHi =
20665 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20666 Value *LoHi = Builder.CreateIntrinsic(
20668 {AlignedAddr, IncrLo, IncrHi});
20669 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20670 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20671 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20672 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20673 return Builder.CreateOr(
20674 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20675}
20676
20678 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20679 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20680 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20681 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20682 Type *ValTy = CmpVal->getType();
20683 assert(ValTy->getPrimitiveSizeInBits() == 128);
20684 Function *IntCmpXchg =
20685 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20686 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20687 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20688 Value *CmpHi =
20689 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20690 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20691 Value *NewHi =
20692 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20693 emitLeadingFence(Builder, CI, Ord);
20694 Value *LoHi =
20695 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20696 emitTrailingFence(Builder, CI, Ord);
20697 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20698 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20699 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20700 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20701 return Builder.CreateOr(
20702 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20703}
20704
20706 return Subtarget.useCRBits();
20707}
20708
20709/// Shuffle masks for vectors of bits are not legal as such vectors are
20710/// reserved for MMA/DM.
20711bool PPCTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
20712 if (VT.getScalarType() == MVT::i1)
20713 return false;
20714 return TargetLowering::isShuffleMaskLegal(Mask, VT);
20715}
20716
20717// Optimize the following patterns using vbpermq/vbpermd:
20718// i16 = bitcast(v16i1 truncate(v16i8))
20719// i8 = bitcast(v8i1 truncate(v8i16))
20720// i8 = bitcast(v8i1 truncate(v8i8))
20721SDValue PPCTargetLowering::DAGCombineBitcast(SDNode *N,
20722 DAGCombinerInfo &DCI) const {
20723 SDValue Op0 = N->getOperand(0);
20724 if (Op0.getOpcode() != ISD::TRUNCATE)
20725 return SDValue();
20726 SDValue Src = Op0.getOperand(0);
20727 EVT ResVT = N->getValueType(0);
20728 EVT TruncResVT = Op0.getValueType();
20729 EVT SrcVT = Src.getValueType();
20730 SDLoc dl(N);
20731 SelectionDAG &DAG = DCI.DAG;
20732 bool IsLittleEndian = Subtarget.isLittleEndian();
20733
20734 if (ResVT != MVT::i16 && ResVT != MVT::i8)
20735 return SDValue();
20736 SDValue VBPerm =
20737 GenerateVBPERM(DAG, dl, Src, SrcVT, TruncResVT, IsLittleEndian);
20738 if (!VBPerm)
20739 return SDValue();
20740 SDValue ForExtract = DAG.getBitcast(MVT::v4i32, VBPerm);
20741 SDValue Extracted =
20742 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, ForExtract,
20743 DAG.getIntPtrConstant(IsLittleEndian ? 2 : 1, dl));
20744 return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Extracted);
20745}
20746
20747SDValue PPCTargetLowering::GenerateVBPERM(SelectionDAG &DAG, SDLoc dl,
20748 SDValue Src, EVT SrcVT, EVT ResVT,
20749 bool IsLE) const {
20750 bool IsV16i8 = (ResVT == MVT::v16i1 && SrcVT == MVT::v16i8);
20751 bool IsV8i16 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i16);
20752 bool IsV8i8 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i8);
20753
20754 if (!IsV16i8 && !IsV8i16 && !IsV8i8)
20755 return SDValue();
20756
20757 if (IsV8i8) {
20758 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i8,
20759 DAG.getUNDEF(MVT::v16i8), Src,
20760 DAG.getIntPtrConstant(0, dl));
20761 }
20762 SmallVector<int, 16> BitIndices(16, 128);
20763 unsigned NumElts = SrcVT.getVectorNumElements();
20764 unsigned EltSize = SrcVT.getScalarType().getSizeInBits();
20765 for (int Idx = 0, End = SrcVT.getVectorNumElements(); Idx < End; Idx++) {
20766 BitIndices[Idx] = EltSize * (NumElts - Idx) - 1;
20767 if (IsV8i8 && IsLE)
20768 BitIndices[Idx] += 64;
20769 }
20770 if (!IsLE)
20771 std::reverse(BitIndices.begin(), BitIndices.end());
20773 for (auto Idx : BitIndices)
20774 BVOps.push_back(DAG.getConstant(Idx, dl, MVT::i8));
20775 SDValue VRB = DAG.getBuildVector(MVT::v16i8, dl, BVOps);
20776 return DAG.getNode(
20777 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
20778 DAG.getConstant(Intrinsic::ppc_altivec_vbpermq, dl, MVT::i32),
20779 DAG.getBitcast(MVT::v16i8, Src), VRB);
20780}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG)
Optimize the bitfloor(X) pattern for PowerPC.
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
bool isDenormal() const
Definition APFloat.h:1517
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:215
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:200
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:184
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:975
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:154
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:58
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.