LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
167 unsigned OpIdx, bool IsByte,
168 const PPCInstrInfo *TII);
169
170// A faster local-[exec|dynamic] TLS access sequence (enabled with the
171// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
172// variables; consistent with the IBM XL compiler, we apply a max size of
173// slightly under 32KB.
175
176// FIXME: Remove this once the bug has been fixed!
178
180 const PPCSubtarget &STI)
181 : TargetLowering(TM, STI), Subtarget(STI) {
182 // Initialize map that relates the PPC addressing modes to the computed flags
183 // of a load/store instruction. The map is used to determine the optimal
184 // addressing mode when selecting load and stores.
185 initializeAddrModeMap();
186 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
187 // arguments are at least 4/8 bytes aligned.
188 bool isPPC64 = Subtarget.isPPC64();
189 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
190 const MVT RegVT = Subtarget.getScalarIntVT();
191
192 // Set up the register classes.
193 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
194 if (!useSoftFloat()) {
195 if (hasSPE()) {
196 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
197 // EFPU2 APU only supports f32
198 if (!Subtarget.hasEFPU2())
199 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
200 } else {
201 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
202 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
203 }
204 }
205
208
209 // PowerPC uses addo_carry,subo_carry to propagate carry.
212
213 // On P10, the default lowering generates better code using the
214 // setbc instruction.
215 if (!Subtarget.hasP10Vector()) {
218 if (isPPC64) {
221 }
222 }
223
224 // Match BITREVERSE to customized fast code sequence in the td file.
227
228 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
230
231 // Custom lower inline assembly to check for special registers.
234
235 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
236 for (MVT VT : MVT::integer_valuetypes()) {
239 }
240
241 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
243
244 if (Subtarget.isISA3_0()) {
245 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
246 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
247 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
248 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
249 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
250 } else {
251 // No extending loads from f16 or HW conversions back and forth.
252 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
254 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
257 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
260 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
261 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
262 }
263
264 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
265
266 // PowerPC has pre-inc load and store's.
277 if (!Subtarget.hasSPE()) {
282 }
283
284 if (Subtarget.useCRBits()) {
286
287 if (isPPC64 || Subtarget.hasFPCVT()) {
292
294 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
296 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
297
302
304 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
306 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
307 } else {
312 }
313
314 // PowerPC does not support direct load/store of condition registers.
317
318 // FIXME: Remove this once the ANDI glue bug is fixed:
319 if (ANDIGlueBug)
321
322 for (MVT VT : MVT::integer_valuetypes()) {
325 setTruncStoreAction(VT, MVT::i1, Expand);
326 }
327
328 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
329 }
330
331 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
332 // PPC (the libcall is not available).
337
338 // We do not currently implement these libm ops for PowerPC.
339 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
341 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
342 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
344 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
345
346 // PowerPC has no SREM/UREM instructions unless we are on P9
347 // On P9 we may use a hardware instruction to compute the remainder.
348 // When the result of both the remainder and the division is required it is
349 // more efficient to compute the remainder from the result of the division
350 // rather than use the remainder instruction. The instructions are legalized
351 // directly because the DivRemPairsPass performs the transformation at the IR
352 // level.
353 if (Subtarget.isISA3_0()) {
358 } else {
363 }
364
365 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
374
375 // Handle constrained floating-point operations of scalar.
376 // TODO: Handle SPE specific operation.
382
387
388 if (!Subtarget.hasSPE()) {
391 }
392
393 if (Subtarget.hasVSX()) {
396 }
397
398 if (Subtarget.hasFSQRT()) {
401 }
402
403 if (Subtarget.hasFPRND()) {
408
413 }
414
415 // We don't support sin/cos/sqrt/fmod/pow
426
427 // MASS transformation for LLVM intrinsics with replicating fast-math flag
428 // to be consistent to PPCGenScalarMASSEntries pass
429 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
442 }
443
444 if (Subtarget.hasSPE()) {
447 } else {
448 setOperationAction(ISD::FMA , MVT::f64, Legal);
449 setOperationAction(ISD::FMA , MVT::f32, Legal);
452 }
453
454 if (Subtarget.hasSPE())
455 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
456
457 // If we're enabling GP optimizations, use hardware square root
458 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
460
461 if (!Subtarget.hasFSQRT() &&
462 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
464
465 if (Subtarget.hasFCPSGN()) {
468 } else {
471 }
472
473 if (Subtarget.hasFPRND()) {
478
483 }
484
485 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
486 // instruction xxbrd to speed up scalar BSWAP64.
487 if (Subtarget.isISA3_1()) {
490 } else {
493 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
494 }
495
496 // CTPOP or CTTZ were introduced in P8/P9 respectively
497 if (Subtarget.isISA3_0()) {
498 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
499 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
500 } else {
501 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
502 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
503 }
504
505 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
508 } else {
511 }
512
513 // PowerPC does not have ROTR
516
517 if (!Subtarget.useCRBits()) {
518 // PowerPC does not have Select
523 }
524
525 // PowerPC wants to turn select_cc of FP into fsel when possible.
528
529 // PowerPC wants to optimize integer setcc a bit
530 if (!Subtarget.useCRBits())
532
533 if (Subtarget.hasFPU()) {
537
541 }
542
543 // PowerPC does not have BRCOND which requires SetCC
544 if (!Subtarget.useCRBits())
546
548
549 if (Subtarget.hasSPE()) {
550 // SPE has built-in conversions
557
558 // SPE supports signaling compare of f32/f64.
561 } else {
562 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
565
566 // PowerPC does not have [U|S]INT_TO_FP
571 }
572
573 if (Subtarget.hasDirectMove() && isPPC64) {
578
587 } else {
592 }
593
594 // We cannot sextinreg(i1). Expand to shifts.
596
597 // Custom handling for PowerPC ucmp instruction
599 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
600
601 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
602 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
603 // support continuation, user-level threading, and etc.. As a result, no
604 // other SjLj exception interfaces are implemented and please don't build
605 // your own exception handling based on them.
606 // LLVM/Clang supports zero-cost DWARF exception handling.
609
610 // We want to legalize GlobalAddress and ConstantPool nodes into the
611 // appropriate instructions to materialize the address.
622
623 // TRAP is legal.
624 setOperationAction(ISD::TRAP, MVT::Other, Legal);
625
626 // TRAMPOLINE is custom lowered.
629
630 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
632
633 if (Subtarget.is64BitELFABI()) {
634 // VAARG always uses double-word chunks, so promote anything smaller.
636 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
638 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
640 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
642 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
644 } else if (Subtarget.is32BitELFABI()) {
645 // VAARG is custom lowered with the 32-bit SVR4 ABI.
648 } else
650
651 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
652 if (Subtarget.is32BitELFABI())
654 else
656
657 // Use the default implementation.
658 setOperationAction(ISD::VAEND , MVT::Other, Expand);
667
668 if (Subtarget.isISA3_0() && isPPC64) {
669 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
673 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
674 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
675 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
676 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
677 }
678
679 // We want to custom lower some of our intrinsics.
685
686 // To handle counter-based loop conditions.
689
694
695 // Comparisons that require checking two conditions.
696 if (Subtarget.hasSPE()) {
701 }
714
717
718 if (Subtarget.has64BitSupport()) {
719 // They also have instructions for converting between i64 and fp.
728 // This is just the low 32 bits of a (signed) fp->i64 conversion.
729 // We cannot do this with Promote because i64 is not a legal type.
732
733 if (Subtarget.hasLFIWAX() || isPPC64) {
736 }
737 } else {
738 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
739 if (Subtarget.hasSPE()) {
742 } else {
745 }
746 }
747
748 // With the instructions enabled under FPCVT, we can do everything.
749 if (Subtarget.hasFPCVT()) {
750 if (Subtarget.has64BitSupport()) {
759 }
760
769 }
770
771 if (Subtarget.use64BitRegs()) {
772 // 64-bit PowerPC implementations can support i64 types directly
773 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
774 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
776 // 64-bit PowerPC wants to expand i128 shifts itself.
780 } else {
781 // 32-bit PowerPC wants to expand i64 shifts itself.
785 }
786
787 // PowerPC has better expansions for funnel shifts than the generic
788 // TargetLowering::expandFunnelShift.
789 if (Subtarget.has64BitSupport()) {
792 }
795
796 if (Subtarget.hasVSX()) {
807 }
808
809 if (Subtarget.hasAltivec()) {
810 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
817 }
818 // First set operation action for all vector types to expand. Then we
819 // will selectively turn on ones that can be effectively codegen'd.
821 // add/sub are legal for all supported vector VT's.
824
825 // For v2i64, these are only valid with P8Vector. This is corrected after
826 // the loop.
827 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
832 }
833 else {
838 }
839
840 if (Subtarget.hasVSX()) {
846 }
847
848 // Vector instructions introduced in P8
849 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
852 }
853 else {
856 }
857
858 // Vector instructions introduced in P9
859 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
861 else
863
864 // We promote all shuffles to v16i8.
866 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
867
868 // We promote all non-typed operations to v4i32.
870 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
872 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
874 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
876 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
878 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
881 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
883 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
884
885 // No other operations are legal.
924
925 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
926 setTruncStoreAction(VT, InnerVT, Expand);
929 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
930 }
931 }
933 if (!Subtarget.hasP8Vector()) {
934 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
935 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
936 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
937 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
938 }
939
940 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
941 // with merges, splats, etc.
943
944 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
945 // are cheap, so handle them before they get expanded to scalar.
951
952 setOperationAction(ISD::AND , MVT::v4i32, Legal);
953 setOperationAction(ISD::OR , MVT::v4i32, Legal);
954 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
955 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
957 Subtarget.useCRBits() ? Legal : Expand);
958 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
968 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
971
972 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
973 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
974 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
975 if (Subtarget.hasAltivec())
976 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
978 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
979 if (Subtarget.hasP8Altivec())
980 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
981
982 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
983 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
984 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
985 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
986
987 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
988 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
989
990 if (Subtarget.hasVSX()) {
991 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
992 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
994 }
995
996 if (Subtarget.hasP8Altivec())
997 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
998 else
999 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1000
1001 if (Subtarget.isISA3_1()) {
1002 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1003 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1004 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1005 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1006 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1007 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1008 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1009 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1010 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1011 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1012 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1013 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1014 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1015 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1016 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1017 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1018 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1019 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1020 }
1021
1022 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1023 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1024
1027 // LE is P8+/64-bit so direct moves are supported and these operations
1028 // are legal. The custom transformation requires 64-bit since we need a
1029 // pair of stores that will cover a 128-bit load for P10.
1030 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1034 }
1035
1040
1041 // Altivec does not contain unordered floating-point compare instructions
1042 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1043 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1044 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1045 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1046
1047 if (Subtarget.hasVSX()) {
1050 if (Subtarget.hasP8Vector()) {
1053 }
1054 if (Subtarget.hasDirectMove() && isPPC64) {
1063 }
1065
1066 // The nearbyint variants are not allowed to raise the inexact exception
1067 // so we can only code-gen them with fpexcept.ignore.
1072
1073 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1074 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1075 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1076 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1077 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1080
1081 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1082 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1085
1086 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1087 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1088
1089 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1090 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1091
1092 // Share the Altivec comparison restrictions.
1093 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1094 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1095 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1096 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1097
1098 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1099 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1100
1102
1103 if (Subtarget.hasP8Vector())
1104 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1105
1106 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1107
1108 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1109 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1110 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1111
1112 if (Subtarget.hasP8Altivec()) {
1113 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1114 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1115 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1116
1117 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1118 // SRL, but not for SRA because of the instructions available:
1119 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1120 // doing
1121 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1122 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1123 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1124
1125 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1126 }
1127 else {
1128 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1129 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1130 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1131
1132 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1133
1134 // VSX v2i64 only supports non-arithmetic operations.
1135 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1136 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1137 }
1138
1139 if (Subtarget.isISA3_1())
1140 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1141 else
1142 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1143
1144 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1145 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1147 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1148
1150
1159
1160 // Custom handling for partial vectors of integers converted to
1161 // floating point. We already have optimal handling for v2i32 through
1162 // the DAG combine, so those aren't necessary.
1179
1180 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1181 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1182 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1183 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1186
1189
1190 // Handle constrained floating-point operations of vector.
1191 // The predictor is `hasVSX` because altivec instruction has
1192 // no exception but VSX vector instruction has.
1206
1220
1221 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1222 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1223
1224 for (MVT FPT : MVT::fp_valuetypes())
1225 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1226
1227 // Expand the SELECT to SELECT_CC
1229
1230 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1231 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1232
1233 // No implementation for these ops for PowerPC.
1235 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1236 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1237 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1240 }
1241
1242 if (Subtarget.hasP8Altivec()) {
1243 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1244 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1245 }
1246
1247 if (Subtarget.hasP9Vector()) {
1250
1251 // Test data class instructions store results in CR bits.
1252 if (Subtarget.useCRBits()) {
1257 }
1258
1259 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1260 // SRL, but not for SRA because of the instructions available:
1261 // VS{RL} and VS{RL}O.
1262 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1263 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1264 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1265
1266 setOperationAction(ISD::FADD, MVT::f128, Legal);
1267 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1268 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1269 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1271
1272 setOperationAction(ISD::FMA, MVT::f128, Legal);
1279
1281 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1283 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1286
1290
1291 // Handle constrained floating-point operations of fp128
1308 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1309 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1310 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1311 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1312 } else if (Subtarget.hasVSX()) {
1315
1316 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1317 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1318
1319 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1320 // fp_to_uint and int_to_fp.
1323
1324 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1325 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1326 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1327 setOperationAction(ISD::FABS, MVT::f128, Expand);
1329 setOperationAction(ISD::FMA, MVT::f128, Expand);
1331
1332 // Expand the fp_extend if the target type is fp128.
1335
1336 // Expand the fp_round if the source type is fp128.
1337 for (MVT VT : {MVT::f32, MVT::f64}) {
1340 }
1341
1346
1347 // Lower following f128 select_cc pattern:
1348 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1350
1351 // We need to handle f128 SELECT_CC with integer result type.
1353 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1354 }
1355
1356 if (Subtarget.hasP9Altivec()) {
1357 if (Subtarget.isISA3_1()) {
1362 } else {
1365 }
1373
1374 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1375 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1376 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1377 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1378 }
1379
1380 if (Subtarget.hasP10Vector()) {
1382 }
1383 }
1384
1385 if (Subtarget.pairedVectorMemops()) {
1386 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1387 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1388 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1389 }
1390 if (Subtarget.hasMMA()) {
1391 if (Subtarget.isISAFuture()) {
1392 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1393 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1394 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1395 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1396 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1397 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1398 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1399 } else {
1400 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1401 }
1402 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1403 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1405 }
1406
1407 if (Subtarget.has64BitSupport())
1409
1410 if (Subtarget.isISA3_1())
1411 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1412
1413 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1414
1415 if (!isPPC64) {
1418 }
1419
1424 }
1425
1427
1428 if (Subtarget.hasAltivec()) {
1429 // Altivec instructions set fields to all zeros or all ones.
1431 }
1432
1435 else if (isPPC64)
1437 else
1439
1440 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1441
1442 // We have target-specific dag combine patterns for the following nodes:
1446 if (Subtarget.hasFPCVT())
1449 if (Subtarget.useCRBits())
1453
1455
1457
1458 if (Subtarget.useCRBits()) {
1460 }
1461
1462 if (Subtarget.hasP8Vector())
1464
1465 // With 32 condition bits, we don't need to sink (and duplicate) compares
1466 // aggressively in CodeGenPrep.
1467 if (Subtarget.useCRBits()) {
1469 }
1470
1471 // TODO: The default entry number is set to 64. This stops most jump table
1472 // generation on PPC. But it is good for current PPC HWs because the indirect
1473 // branch instruction mtctr to the jump table may lead to bad branch predict.
1474 // Re-evaluate this value on future HWs that can do better with mtctr.
1476
1477 // The default minimum of largest number in a BitTest cluster is 3.
1479
1481 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1482
1483 auto CPUDirective = Subtarget.getCPUDirective();
1484 switch (CPUDirective) {
1485 default: break;
1486 case PPC::DIR_970:
1487 case PPC::DIR_A2:
1488 case PPC::DIR_E500:
1489 case PPC::DIR_E500mc:
1490 case PPC::DIR_E5500:
1491 case PPC::DIR_PWR4:
1492 case PPC::DIR_PWR5:
1493 case PPC::DIR_PWR5X:
1494 case PPC::DIR_PWR6:
1495 case PPC::DIR_PWR6X:
1496 case PPC::DIR_PWR7:
1497 case PPC::DIR_PWR8:
1498 case PPC::DIR_PWR9:
1499 case PPC::DIR_PWR10:
1500 case PPC::DIR_PWR11:
1504 break;
1505 }
1506
1507 if (Subtarget.enableMachineScheduler())
1509 else
1511
1513
1514 // The Freescale cores do better with aggressive inlining of memcpy and
1515 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1516 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1517 MaxStoresPerMemset = 32;
1519 MaxStoresPerMemcpy = 32;
1523 } else if (CPUDirective == PPC::DIR_A2) {
1524 // The A2 also benefits from (very) aggressive inlining of memcpy and
1525 // friends. The overhead of a the function call, even when warm, can be
1526 // over one hundred cycles.
1527 MaxStoresPerMemset = 128;
1528 MaxStoresPerMemcpy = 128;
1529 MaxStoresPerMemmove = 128;
1530 MaxLoadsPerMemcmp = 128;
1531 } else {
1534 }
1535
1536 // Enable generation of STXVP instructions by default for mcpu=future.
1537 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1538 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1539 DisableAutoPairedVecSt = false;
1540
1541 IsStrictFPEnabled = true;
1542
1543 // Let the subtarget (CPU) decide if a predictable select is more expensive
1544 // than the corresponding branch. This information is used in CGP to decide
1545 // when to convert selects into branches.
1546 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1547
1549}
1550
1551// *********************************** NOTE ************************************
1552// For selecting load and store instructions, the addressing modes are defined
1553// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1554// patterns to match the load the store instructions.
1555//
1556// The TD definitions for the addressing modes correspond to their respective
1557// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1558// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1559// address mode flags of a particular node. Afterwards, the computed address
1560// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1561// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1562// accordingly, based on the preferred addressing mode.
1563//
1564// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1565// MemOpFlags contains all the possible flags that can be used to compute the
1566// optimal addressing mode for load and store instructions.
1567// AddrMode contains all the possible load and store addressing modes available
1568// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1569//
1570// When adding new load and store instructions, it is possible that new address
1571// flags may need to be added into MemOpFlags, and a new addressing mode will
1572// need to be added to AddrMode. An entry of the new addressing mode (consisting
1573// of the minimal and main distinguishing address flags for the new load/store
1574// instructions) will need to be added into initializeAddrModeMap() below.
1575// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1576// need to be updated to account for selecting the optimal addressing mode.
1577// *****************************************************************************
1578/// Initialize the map that relates the different addressing modes of the load
1579/// and store instructions to a set of flags. This ensures the load/store
1580/// instruction is correctly matched during instruction selection.
1581void PPCTargetLowering::initializeAddrModeMap() {
1582 AddrModesMap[PPC::AM_DForm] = {
1583 // LWZ, STW
1588 // LBZ, LHZ, STB, STH
1593 // LHA
1598 // LFS, LFD, STFS, STFD
1603 };
1604 AddrModesMap[PPC::AM_DSForm] = {
1605 // LWA
1609 // LD, STD
1613 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1617 };
1618 AddrModesMap[PPC::AM_DQForm] = {
1619 // LXV, STXV
1623 };
1624 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1626 // TODO: Add mapping for quadword load/store.
1627}
1628
1629/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1630/// the desired ByVal argument alignment.
1631static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1632 if (MaxAlign == MaxMaxAlign)
1633 return;
1634 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1635 if (MaxMaxAlign >= 32 &&
1636 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1637 MaxAlign = Align(32);
1638 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1639 MaxAlign < 16)
1640 MaxAlign = Align(16);
1641 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1642 Align EltAlign;
1643 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1644 if (EltAlign > MaxAlign)
1645 MaxAlign = EltAlign;
1646 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1647 for (auto *EltTy : STy->elements()) {
1648 Align EltAlign;
1649 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1650 if (EltAlign > MaxAlign)
1651 MaxAlign = EltAlign;
1652 if (MaxAlign == MaxMaxAlign)
1653 break;
1654 }
1655 }
1656}
1657
1658/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1659/// function arguments in the caller parameter area.
1661 const DataLayout &DL) const {
1662 // 16byte and wider vectors are passed on 16byte boundary.
1663 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1664 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1665 if (Subtarget.hasAltivec())
1666 getMaxByValAlign(Ty, Alignment, Align(16));
1667 return Alignment;
1668}
1669
1671 return Subtarget.useSoftFloat();
1672}
1673
1675 return Subtarget.hasSPE();
1676}
1677
1679 return VT.isScalarInteger();
1680}
1681
1683 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1684 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1685 return false;
1686
1687 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1688 if (VTy->getScalarType()->isIntegerTy()) {
1689 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1690 if (ElemSizeInBits == 32) {
1691 Index = Subtarget.isLittleEndian() ? 2 : 1;
1692 return true;
1693 }
1694 if (ElemSizeInBits == 64) {
1695 Index = Subtarget.isLittleEndian() ? 1 : 0;
1696 return true;
1697 }
1698 }
1699 }
1700 return false;
1701}
1702
1704 EVT VT) const {
1705 if (!VT.isVector())
1706 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1707
1709}
1710
1712 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1713 return true;
1714}
1715
1716//===----------------------------------------------------------------------===//
1717// Node matching predicates, for use by the tblgen matching code.
1718//===----------------------------------------------------------------------===//
1719
1720/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1723 return CFP->getValueAPF().isZero();
1724 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1725 // Maybe this has already been legalized into the constant pool?
1726 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1727 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1728 return CFP->getValueAPF().isZero();
1729 }
1730 return false;
1731}
1732
1733/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1734/// true if Op is undef or if it matches the specified value.
1735static bool isConstantOrUndef(int Op, int Val) {
1736 return Op < 0 || Op == Val;
1737}
1738
1739/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1740/// VPKUHUM instruction.
1741/// The ShuffleKind distinguishes between big-endian operations with
1742/// two different inputs (0), either-endian operations with two identical
1743/// inputs (1), and little-endian operations with two different inputs (2).
1744/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1746 SelectionDAG &DAG) {
1747 bool IsLE = DAG.getDataLayout().isLittleEndian();
1748 if (ShuffleKind == 0) {
1749 if (IsLE)
1750 return false;
1751 for (unsigned i = 0; i != 16; ++i)
1752 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1753 return false;
1754 } else if (ShuffleKind == 2) {
1755 if (!IsLE)
1756 return false;
1757 for (unsigned i = 0; i != 16; ++i)
1758 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1759 return false;
1760 } else if (ShuffleKind == 1) {
1761 unsigned j = IsLE ? 0 : 1;
1762 for (unsigned i = 0; i != 8; ++i)
1763 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1764 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1765 return false;
1766 }
1767 return true;
1768}
1769
1770/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1771/// VPKUWUM instruction.
1772/// The ShuffleKind distinguishes between big-endian operations with
1773/// two different inputs (0), either-endian operations with two identical
1774/// inputs (1), and little-endian operations with two different inputs (2).
1775/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1777 SelectionDAG &DAG) {
1778 bool IsLE = DAG.getDataLayout().isLittleEndian();
1779 if (ShuffleKind == 0) {
1780 if (IsLE)
1781 return false;
1782 for (unsigned i = 0; i != 16; i += 2)
1783 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1784 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1785 return false;
1786 } else if (ShuffleKind == 2) {
1787 if (!IsLE)
1788 return false;
1789 for (unsigned i = 0; i != 16; i += 2)
1790 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1791 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1792 return false;
1793 } else if (ShuffleKind == 1) {
1794 unsigned j = IsLE ? 0 : 2;
1795 for (unsigned i = 0; i != 8; i += 2)
1796 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1797 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1798 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1799 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1800 return false;
1801 }
1802 return true;
1803}
1804
1805/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1806/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1807/// current subtarget.
1808///
1809/// The ShuffleKind distinguishes between big-endian operations with
1810/// two different inputs (0), either-endian operations with two identical
1811/// inputs (1), and little-endian operations with two different inputs (2).
1812/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1814 SelectionDAG &DAG) {
1815 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1816 if (!Subtarget.hasP8Vector())
1817 return false;
1818
1819 bool IsLE = DAG.getDataLayout().isLittleEndian();
1820 if (ShuffleKind == 0) {
1821 if (IsLE)
1822 return false;
1823 for (unsigned i = 0; i != 16; i += 4)
1824 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1825 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1826 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1827 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1828 return false;
1829 } else if (ShuffleKind == 2) {
1830 if (!IsLE)
1831 return false;
1832 for (unsigned i = 0; i != 16; i += 4)
1833 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1834 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1835 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1836 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1837 return false;
1838 } else if (ShuffleKind == 1) {
1839 unsigned j = IsLE ? 0 : 4;
1840 for (unsigned i = 0; i != 8; i += 4)
1841 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1842 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1843 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1844 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1845 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1846 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1847 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1848 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1849 return false;
1850 }
1851 return true;
1852}
1853
1854/// isVMerge - Common function, used to match vmrg* shuffles.
1855///
1856static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1857 unsigned LHSStart, unsigned RHSStart) {
1858 if (N->getValueType(0) != MVT::v16i8)
1859 return false;
1860 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1861 "Unsupported merge size!");
1862
1863 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1864 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1865 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1866 LHSStart+j+i*UnitSize) ||
1867 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1868 RHSStart+j+i*UnitSize))
1869 return false;
1870 }
1871 return true;
1872}
1873
1874/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1875/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1876/// The ShuffleKind distinguishes between big-endian merges with two
1877/// different inputs (0), either-endian merges with two identical inputs (1),
1878/// and little-endian merges with two different inputs (2). For the latter,
1879/// the input operands are swapped (see PPCInstrAltivec.td).
1881 unsigned ShuffleKind, SelectionDAG &DAG) {
1882 if (DAG.getDataLayout().isLittleEndian()) {
1883 if (ShuffleKind == 1) // unary
1884 return isVMerge(N, UnitSize, 0, 0);
1885 else if (ShuffleKind == 2) // swapped
1886 return isVMerge(N, UnitSize, 0, 16);
1887 else
1888 return false;
1889 } else {
1890 if (ShuffleKind == 1) // unary
1891 return isVMerge(N, UnitSize, 8, 8);
1892 else if (ShuffleKind == 0) // normal
1893 return isVMerge(N, UnitSize, 8, 24);
1894 else
1895 return false;
1896 }
1897}
1898
1899/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1900/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1901/// The ShuffleKind distinguishes between big-endian merges with two
1902/// different inputs (0), either-endian merges with two identical inputs (1),
1903/// and little-endian merges with two different inputs (2). For the latter,
1904/// the input operands are swapped (see PPCInstrAltivec.td).
1906 unsigned ShuffleKind, SelectionDAG &DAG) {
1907 if (DAG.getDataLayout().isLittleEndian()) {
1908 if (ShuffleKind == 1) // unary
1909 return isVMerge(N, UnitSize, 8, 8);
1910 else if (ShuffleKind == 2) // swapped
1911 return isVMerge(N, UnitSize, 8, 24);
1912 else
1913 return false;
1914 } else {
1915 if (ShuffleKind == 1) // unary
1916 return isVMerge(N, UnitSize, 0, 0);
1917 else if (ShuffleKind == 0) // normal
1918 return isVMerge(N, UnitSize, 0, 16);
1919 else
1920 return false;
1921 }
1922}
1923
1924/**
1925 * Common function used to match vmrgew and vmrgow shuffles
1926 *
1927 * The indexOffset determines whether to look for even or odd words in
1928 * the shuffle mask. This is based on the of the endianness of the target
1929 * machine.
1930 * - Little Endian:
1931 * - Use offset of 0 to check for odd elements
1932 * - Use offset of 4 to check for even elements
1933 * - Big Endian:
1934 * - Use offset of 0 to check for even elements
1935 * - Use offset of 4 to check for odd elements
1936 * A detailed description of the vector element ordering for little endian and
1937 * big endian can be found at
1938 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1939 * Targeting your applications - what little endian and big endian IBM XL C/C++
1940 * compiler differences mean to you
1941 *
1942 * The mask to the shuffle vector instruction specifies the indices of the
1943 * elements from the two input vectors to place in the result. The elements are
1944 * numbered in array-access order, starting with the first vector. These vectors
1945 * are always of type v16i8, thus each vector will contain 16 elements of size
1946 * 8. More info on the shuffle vector can be found in the
1947 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1948 * Language Reference.
1949 *
1950 * The RHSStartValue indicates whether the same input vectors are used (unary)
1951 * or two different input vectors are used, based on the following:
1952 * - If the instruction uses the same vector for both inputs, the range of the
1953 * indices will be 0 to 15. In this case, the RHSStart value passed should
1954 * be 0.
1955 * - If the instruction has two different vectors then the range of the
1956 * indices will be 0 to 31. In this case, the RHSStart value passed should
1957 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1958 * to 31 specify elements in the second vector).
1959 *
1960 * \param[in] N The shuffle vector SD Node to analyze
1961 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1962 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1963 * vector to the shuffle_vector instruction
1964 * \return true iff this shuffle vector represents an even or odd word merge
1965 */
1966static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1967 unsigned RHSStartValue) {
1968 if (N->getValueType(0) != MVT::v16i8)
1969 return false;
1970
1971 for (unsigned i = 0; i < 2; ++i)
1972 for (unsigned j = 0; j < 4; ++j)
1973 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1974 i*RHSStartValue+j+IndexOffset) ||
1975 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1976 i*RHSStartValue+j+IndexOffset+8))
1977 return false;
1978 return true;
1979}
1980
1981/**
1982 * Determine if the specified shuffle mask is suitable for the vmrgew or
1983 * vmrgow instructions.
1984 *
1985 * \param[in] N The shuffle vector SD Node to analyze
1986 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1987 * \param[in] ShuffleKind Identify the type of merge:
1988 * - 0 = big-endian merge with two different inputs;
1989 * - 1 = either-endian merge with two identical inputs;
1990 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1991 * little-endian merges).
1992 * \param[in] DAG The current SelectionDAG
1993 * \return true iff this shuffle mask
1994 */
1996 unsigned ShuffleKind, SelectionDAG &DAG) {
1997 if (DAG.getDataLayout().isLittleEndian()) {
1998 unsigned indexOffset = CheckEven ? 4 : 0;
1999 if (ShuffleKind == 1) // Unary
2000 return isVMerge(N, indexOffset, 0);
2001 else if (ShuffleKind == 2) // swapped
2002 return isVMerge(N, indexOffset, 16);
2003 else
2004 return false;
2005 }
2006 else {
2007 unsigned indexOffset = CheckEven ? 0 : 4;
2008 if (ShuffleKind == 1) // Unary
2009 return isVMerge(N, indexOffset, 0);
2010 else if (ShuffleKind == 0) // Normal
2011 return isVMerge(N, indexOffset, 16);
2012 else
2013 return false;
2014 }
2015 return false;
2016}
2017
2018/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2019/// amount, otherwise return -1.
2020/// The ShuffleKind distinguishes between big-endian operations with two
2021/// different inputs (0), either-endian operations with two identical inputs
2022/// (1), and little-endian operations with two different inputs (2). For the
2023/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2024int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2025 SelectionDAG &DAG) {
2026 if (N->getValueType(0) != MVT::v16i8)
2027 return -1;
2028
2030
2031 // Find the first non-undef value in the shuffle mask.
2032 unsigned i;
2033 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2034 /*search*/;
2035
2036 if (i == 16) return -1; // all undef.
2037
2038 // Otherwise, check to see if the rest of the elements are consecutively
2039 // numbered from this value.
2040 unsigned ShiftAmt = SVOp->getMaskElt(i);
2041 if (ShiftAmt < i) return -1;
2042
2043 ShiftAmt -= i;
2044 bool isLE = DAG.getDataLayout().isLittleEndian();
2045
2046 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2047 // Check the rest of the elements to see if they are consecutive.
2048 for (++i; i != 16; ++i)
2049 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2050 return -1;
2051 } else if (ShuffleKind == 1) {
2052 // Check the rest of the elements to see if they are consecutive.
2053 for (++i; i != 16; ++i)
2054 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2055 return -1;
2056 } else
2057 return -1;
2058
2059 if (isLE)
2060 ShiftAmt = 16 - ShiftAmt;
2061
2062 return ShiftAmt;
2063}
2064
2065/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2066/// specifies a splat of a single element that is suitable for input to
2067/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2069 EVT VT = N->getValueType(0);
2070 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2071 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2072
2073 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2074 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2075
2076 // The consecutive indices need to specify an element, not part of two
2077 // different elements. So abandon ship early if this isn't the case.
2078 if (N->getMaskElt(0) % EltSize != 0)
2079 return false;
2080
2081 // This is a splat operation if each element of the permute is the same, and
2082 // if the value doesn't reference the second vector.
2083 unsigned ElementBase = N->getMaskElt(0);
2084
2085 // FIXME: Handle UNDEF elements too!
2086 if (ElementBase >= 16)
2087 return false;
2088
2089 // Check that the indices are consecutive, in the case of a multi-byte element
2090 // splatted with a v16i8 mask.
2091 for (unsigned i = 1; i != EltSize; ++i)
2092 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2093 return false;
2094
2095 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2096 // An UNDEF element is a sequence of UNDEF bytes.
2097 if (N->getMaskElt(i) < 0) {
2098 for (unsigned j = 1; j != EltSize; ++j)
2099 if (N->getMaskElt(i + j) >= 0)
2100 return false;
2101 } else
2102 for (unsigned j = 0; j != EltSize; ++j)
2103 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2104 return false;
2105 }
2106 return true;
2107}
2108
2109/// Check that the mask is shuffling N byte elements. Within each N byte
2110/// element of the mask, the indices could be either in increasing or
2111/// decreasing order as long as they are consecutive.
2112/// \param[in] N the shuffle vector SD Node to analyze
2113/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2114/// Word/DoubleWord/QuadWord).
2115/// \param[in] StepLen the delta indices number among the N byte element, if
2116/// the mask is in increasing/decreasing order then it is 1/-1.
2117/// \return true iff the mask is shuffling N byte elements.
2118static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2119 int StepLen) {
2120 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2121 "Unexpected element width.");
2122 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2123
2124 unsigned NumOfElem = 16 / Width;
2125 unsigned MaskVal[16]; // Width is never greater than 16
2126 for (unsigned i = 0; i < NumOfElem; ++i) {
2127 MaskVal[0] = N->getMaskElt(i * Width);
2128 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2129 return false;
2130 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2131 return false;
2132 }
2133
2134 for (unsigned int j = 1; j < Width; ++j) {
2135 MaskVal[j] = N->getMaskElt(i * Width + j);
2136 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2137 return false;
2138 }
2139 }
2140 }
2141
2142 return true;
2143}
2144
2145bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2146 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2147 if (!isNByteElemShuffleMask(N, 4, 1))
2148 return false;
2149
2150 // Now we look at mask elements 0,4,8,12
2151 unsigned M0 = N->getMaskElt(0) / 4;
2152 unsigned M1 = N->getMaskElt(4) / 4;
2153 unsigned M2 = N->getMaskElt(8) / 4;
2154 unsigned M3 = N->getMaskElt(12) / 4;
2155 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2156 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2157
2158 // Below, let H and L be arbitrary elements of the shuffle mask
2159 // where H is in the range [4,7] and L is in the range [0,3].
2160 // H, 1, 2, 3 or L, 5, 6, 7
2161 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2162 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2163 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2164 InsertAtByte = IsLE ? 12 : 0;
2165 Swap = M0 < 4;
2166 return true;
2167 }
2168 // 0, H, 2, 3 or 4, L, 6, 7
2169 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2170 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2171 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2172 InsertAtByte = IsLE ? 8 : 4;
2173 Swap = M1 < 4;
2174 return true;
2175 }
2176 // 0, 1, H, 3 or 4, 5, L, 7
2177 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2178 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2179 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2180 InsertAtByte = IsLE ? 4 : 8;
2181 Swap = M2 < 4;
2182 return true;
2183 }
2184 // 0, 1, 2, H or 4, 5, 6, L
2185 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2186 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2187 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2188 InsertAtByte = IsLE ? 0 : 12;
2189 Swap = M3 < 4;
2190 return true;
2191 }
2192
2193 // If both vector operands for the shuffle are the same vector, the mask will
2194 // contain only elements from the first one and the second one will be undef.
2195 if (N->getOperand(1).isUndef()) {
2196 ShiftElts = 0;
2197 Swap = true;
2198 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2199 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2200 InsertAtByte = IsLE ? 12 : 0;
2201 return true;
2202 }
2203 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2204 InsertAtByte = IsLE ? 8 : 4;
2205 return true;
2206 }
2207 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2208 InsertAtByte = IsLE ? 4 : 8;
2209 return true;
2210 }
2211 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2212 InsertAtByte = IsLE ? 0 : 12;
2213 return true;
2214 }
2215 }
2216
2217 return false;
2218}
2219
2221 bool &Swap, bool IsLE) {
2222 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2223 // Ensure each byte index of the word is consecutive.
2224 if (!isNByteElemShuffleMask(N, 4, 1))
2225 return false;
2226
2227 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2228 unsigned M0 = N->getMaskElt(0) / 4;
2229 unsigned M1 = N->getMaskElt(4) / 4;
2230 unsigned M2 = N->getMaskElt(8) / 4;
2231 unsigned M3 = N->getMaskElt(12) / 4;
2232
2233 // If both vector operands for the shuffle are the same vector, the mask will
2234 // contain only elements from the first one and the second one will be undef.
2235 if (N->getOperand(1).isUndef()) {
2236 assert(M0 < 4 && "Indexing into an undef vector?");
2237 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2238 return false;
2239
2240 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2241 Swap = false;
2242 return true;
2243 }
2244
2245 // Ensure each word index of the ShuffleVector Mask is consecutive.
2246 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2247 return false;
2248
2249 if (IsLE) {
2250 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2251 // Input vectors don't need to be swapped if the leading element
2252 // of the result is one of the 3 left elements of the second vector
2253 // (or if there is no shift to be done at all).
2254 Swap = false;
2255 ShiftElts = (8 - M0) % 8;
2256 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2257 // Input vectors need to be swapped if the leading element
2258 // of the result is one of the 3 left elements of the first vector
2259 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2260 Swap = true;
2261 ShiftElts = (4 - M0) % 4;
2262 }
2263
2264 return true;
2265 } else { // BE
2266 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2267 // Input vectors don't need to be swapped if the leading element
2268 // of the result is one of the 4 elements of the first vector.
2269 Swap = false;
2270 ShiftElts = M0;
2271 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2272 // Input vectors need to be swapped if the leading element
2273 // of the result is one of the 4 elements of the right vector.
2274 Swap = true;
2275 ShiftElts = M0 - 4;
2276 }
2277
2278 return true;
2279 }
2280}
2281
2283 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2284
2285 if (!isNByteElemShuffleMask(N, Width, -1))
2286 return false;
2287
2288 for (int i = 0; i < 16; i += Width)
2289 if (N->getMaskElt(i) != i + Width - 1)
2290 return false;
2291
2292 return true;
2293}
2294
2298
2302
2306
2310
2311/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2312/// if the inputs to the instruction should be swapped and set \p DM to the
2313/// value for the immediate.
2314/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2315/// AND element 0 of the result comes from the first input (LE) or second input
2316/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2317/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2318/// mask.
2320 bool &Swap, bool IsLE) {
2321 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2322
2323 // Ensure each byte index of the double word is consecutive.
2324 if (!isNByteElemShuffleMask(N, 8, 1))
2325 return false;
2326
2327 unsigned M0 = N->getMaskElt(0) / 8;
2328 unsigned M1 = N->getMaskElt(8) / 8;
2329 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2330
2331 // If both vector operands for the shuffle are the same vector, the mask will
2332 // contain only elements from the first one and the second one will be undef.
2333 if (N->getOperand(1).isUndef()) {
2334 if ((M0 | M1) < 2) {
2335 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2336 Swap = false;
2337 return true;
2338 } else
2339 return false;
2340 }
2341
2342 if (IsLE) {
2343 if (M0 > 1 && M1 < 2) {
2344 Swap = false;
2345 } else if (M0 < 2 && M1 > 1) {
2346 M0 = (M0 + 2) % 4;
2347 M1 = (M1 + 2) % 4;
2348 Swap = true;
2349 } else
2350 return false;
2351
2352 // Note: if control flow comes here that means Swap is already set above
2353 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2354 return true;
2355 } else { // BE
2356 if (M0 < 2 && M1 > 1) {
2357 Swap = false;
2358 } else if (M0 > 1 && M1 < 2) {
2359 M0 = (M0 + 2) % 4;
2360 M1 = (M1 + 2) % 4;
2361 Swap = true;
2362 } else
2363 return false;
2364
2365 // Note: if control flow comes here that means Swap is already set above
2366 DM = (M0 << 1) + (M1 & 1);
2367 return true;
2368 }
2369}
2370
2371
2372/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2373/// appropriate for PPC mnemonics (which have a big endian bias - namely
2374/// elements are counted from the left of the vector register).
2375unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2376 SelectionDAG &DAG) {
2378 assert(isSplatShuffleMask(SVOp, EltSize));
2379 EVT VT = SVOp->getValueType(0);
2380
2381 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2382 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2383 : SVOp->getMaskElt(0);
2384
2385 if (DAG.getDataLayout().isLittleEndian())
2386 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2387 else
2388 return SVOp->getMaskElt(0) / EltSize;
2389}
2390
2391/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2392/// by using a vspltis[bhw] instruction of the specified element size, return
2393/// the constant being splatted. The ByteSize field indicates the number of
2394/// bytes of each element [124] -> [bhw].
2396 SDValue OpVal;
2397
2398 // If ByteSize of the splat is bigger than the element size of the
2399 // build_vector, then we have a case where we are checking for a splat where
2400 // multiple elements of the buildvector are folded together into a single
2401 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2402 unsigned EltSize = 16/N->getNumOperands();
2403 if (EltSize < ByteSize) {
2404 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2405 SDValue UniquedVals[4];
2406 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2407
2408 // See if all of the elements in the buildvector agree across.
2409 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2410 if (N->getOperand(i).isUndef()) continue;
2411 // If the element isn't a constant, bail fully out.
2412 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2413
2414 if (!UniquedVals[i&(Multiple-1)].getNode())
2415 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2416 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2417 return SDValue(); // no match.
2418 }
2419
2420 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2421 // either constant or undef values that are identical for each chunk. See
2422 // if these chunks can form into a larger vspltis*.
2423
2424 // Check to see if all of the leading entries are either 0 or -1. If
2425 // neither, then this won't fit into the immediate field.
2426 bool LeadingZero = true;
2427 bool LeadingOnes = true;
2428 for (unsigned i = 0; i != Multiple-1; ++i) {
2429 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2430
2431 LeadingZero &= isNullConstant(UniquedVals[i]);
2432 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2433 }
2434 // Finally, check the least significant entry.
2435 if (LeadingZero) {
2436 if (!UniquedVals[Multiple-1].getNode())
2437 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2438 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2439 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2440 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2441 }
2442 if (LeadingOnes) {
2443 if (!UniquedVals[Multiple-1].getNode())
2444 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2445 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2446 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2447 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2448 }
2449
2450 return SDValue();
2451 }
2452
2453 // Check to see if this buildvec has a single non-undef value in its elements.
2454 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2455 if (N->getOperand(i).isUndef()) continue;
2456 if (!OpVal.getNode())
2457 OpVal = N->getOperand(i);
2458 else if (OpVal != N->getOperand(i))
2459 return SDValue();
2460 }
2461
2462 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2463
2464 unsigned ValSizeInBytes = EltSize;
2465 uint64_t Value = 0;
2466 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2467 Value = CN->getZExtValue();
2468 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2469 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2470 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2471 }
2472
2473 // If the splat value is larger than the element value, then we can never do
2474 // this splat. The only case that we could fit the replicated bits into our
2475 // immediate field for would be zero, and we prefer to use vxor for it.
2476 if (ValSizeInBytes < ByteSize) return SDValue();
2477
2478 // If the element value is larger than the splat value, check if it consists
2479 // of a repeated bit pattern of size ByteSize.
2480 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2481 return SDValue();
2482
2483 // Properly sign extend the value.
2484 int MaskVal = SignExtend32(Value, ByteSize * 8);
2485
2486 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2487 if (MaskVal == 0) return SDValue();
2488
2489 // Finally, if this value fits in a 5 bit sext field, return it
2490 if (SignExtend32<5>(MaskVal) == MaskVal)
2491 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2492 return SDValue();
2493}
2494
2495//===----------------------------------------------------------------------===//
2496// Addressing Mode Selection
2497//===----------------------------------------------------------------------===//
2498
2499/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2500/// or 64-bit immediate, and if the value can be accurately represented as a
2501/// sign extension from a 16-bit value. If so, this returns true and the
2502/// immediate.
2503bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2504 if (!isa<ConstantSDNode>(N))
2505 return false;
2506
2507 Imm = (int16_t)N->getAsZExtVal();
2508 if (N->getValueType(0) == MVT::i32)
2509 return Imm == (int32_t)N->getAsZExtVal();
2510 else
2511 return Imm == (int64_t)N->getAsZExtVal();
2512}
2514 return isIntS16Immediate(Op.getNode(), Imm);
2515}
2516
2517/// Used when computing address flags for selecting loads and stores.
2518/// If we have an OR, check if the LHS and RHS are provably disjoint.
2519/// An OR of two provably disjoint values is equivalent to an ADD.
2520/// Most PPC load/store instructions compute the effective address as a sum,
2521/// so doing this conversion is useful.
2522static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2523 if (N.getOpcode() != ISD::OR)
2524 return false;
2525 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2526 if (!LHSKnown.Zero.getBoolValue())
2527 return false;
2528 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2529 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2530}
2531
2532/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2533/// be represented as an indexed [r+r] operation.
2535 SDValue &Index,
2536 SelectionDAG &DAG) const {
2537 for (SDNode *U : N->users()) {
2538 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2539 if (Memop->getMemoryVT() == MVT::f64) {
2540 Base = N.getOperand(0);
2541 Index = N.getOperand(1);
2542 return true;
2543 }
2544 }
2545 }
2546 return false;
2547}
2548
2549/// isIntS34Immediate - This method tests if value of node given can be
2550/// accurately represented as a sign extension from a 34-bit value. If so,
2551/// this returns true and the immediate.
2552bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2553 if (!isa<ConstantSDNode>(N))
2554 return false;
2555
2556 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2557 return isInt<34>(Imm);
2558}
2560 return isIntS34Immediate(Op.getNode(), Imm);
2561}
2562
2563/// SelectAddressRegReg - Given the specified addressed, check to see if it
2564/// can be represented as an indexed [r+r] operation. Returns false if it
2565/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2566/// non-zero and N can be represented by a base register plus a signed 16-bit
2567/// displacement, make a more precise judgement by checking (displacement % \p
2568/// EncodingAlignment).
2570 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2571 MaybeAlign EncodingAlignment) const {
2572 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2573 // a [pc+imm].
2575 return false;
2576
2577 int16_t Imm = 0;
2578 if (N.getOpcode() == ISD::ADD) {
2579 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2580 // SPE load/store can only handle 8-bit offsets.
2581 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2582 return true;
2583 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2584 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2585 return false; // r+i
2586 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2587 return false; // r+i
2588
2589 Base = N.getOperand(0);
2590 Index = N.getOperand(1);
2591 return true;
2592 } else if (N.getOpcode() == ISD::OR) {
2593 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2594 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2595 return false; // r+i can fold it if we can.
2596
2597 // If this is an or of disjoint bitfields, we can codegen this as an add
2598 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2599 // disjoint.
2600 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2601
2602 if (LHSKnown.Zero.getBoolValue()) {
2603 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2604 // If all of the bits are known zero on the LHS or RHS, the add won't
2605 // carry.
2606 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2607 Base = N.getOperand(0);
2608 Index = N.getOperand(1);
2609 return true;
2610 }
2611 }
2612 }
2613
2614 return false;
2615}
2616
2617// If we happen to be doing an i64 load or store into a stack slot that has
2618// less than a 4-byte alignment, then the frame-index elimination may need to
2619// use an indexed load or store instruction (because the offset may not be a
2620// multiple of 4). The extra register needed to hold the offset comes from the
2621// register scavenger, and it is possible that the scavenger will need to use
2622// an emergency spill slot. As a result, we need to make sure that a spill slot
2623// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2624// stack slot.
2625static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2626 // FIXME: This does not handle the LWA case.
2627 if (VT != MVT::i64)
2628 return;
2629
2630 // NOTE: We'll exclude negative FIs here, which come from argument
2631 // lowering, because there are no known test cases triggering this problem
2632 // using packed structures (or similar). We can remove this exclusion if
2633 // we find such a test case. The reason why this is so test-case driven is
2634 // because this entire 'fixup' is only to prevent crashes (from the
2635 // register scavenger) on not-really-valid inputs. For example, if we have:
2636 // %a = alloca i1
2637 // %b = bitcast i1* %a to i64*
2638 // store i64* a, i64 b
2639 // then the store should really be marked as 'align 1', but is not. If it
2640 // were marked as 'align 1' then the indexed form would have been
2641 // instruction-selected initially, and the problem this 'fixup' is preventing
2642 // won't happen regardless.
2643 if (FrameIdx < 0)
2644 return;
2645
2647 MachineFrameInfo &MFI = MF.getFrameInfo();
2648
2649 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2650 return;
2651
2652 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2653 FuncInfo->setHasNonRISpills();
2654}
2655
2656/// Returns true if the address N can be represented by a base register plus
2657/// a signed 16-bit displacement [r+imm], and if it is not better
2658/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2659/// displacements that are multiples of that value.
2661 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2662 MaybeAlign EncodingAlignment) const {
2663 // FIXME dl should come from parent load or store, not from address
2664 SDLoc dl(N);
2665
2666 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2667 // a [pc+imm].
2669 return false;
2670
2671 // If this can be more profitably realized as r+r, fail.
2672 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2673 return false;
2674
2675 if (N.getOpcode() == ISD::ADD) {
2676 int16_t imm = 0;
2677 if (isIntS16Immediate(N.getOperand(1), imm) &&
2678 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2679 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2680 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2681 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2682 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2683 } else {
2684 Base = N.getOperand(0);
2685 }
2686 return true; // [r+i]
2687 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2688 // Match LOAD (ADD (X, Lo(G))).
2689 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2690 "Cannot handle constant offsets yet!");
2691 Disp = N.getOperand(1).getOperand(0); // The global address.
2696 Base = N.getOperand(0);
2697 return true; // [&g+r]
2698 }
2699 } else if (N.getOpcode() == ISD::OR) {
2700 int16_t imm = 0;
2701 if (isIntS16Immediate(N.getOperand(1), imm) &&
2702 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2703 // If this is an or of disjoint bitfields, we can codegen this as an add
2704 // (for better address arithmetic) if the LHS and RHS of the OR are
2705 // provably disjoint.
2706 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2707
2708 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2709 // If all of the bits are known zero on the LHS or RHS, the add won't
2710 // carry.
2711 if (FrameIndexSDNode *FI =
2712 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2713 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2714 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2715 } else {
2716 Base = N.getOperand(0);
2717 }
2718 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2719 return true;
2720 }
2721 }
2722 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2723 // Loading from a constant address.
2724
2725 // If this address fits entirely in a 16-bit sext immediate field, codegen
2726 // this as "d, 0"
2727 int16_t Imm;
2728 if (isIntS16Immediate(CN, Imm) &&
2729 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2730 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2731 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2732 CN->getValueType(0));
2733 return true;
2734 }
2735
2736 // Handle 32-bit sext immediates with LIS + addr mode.
2737 if ((CN->getValueType(0) == MVT::i32 ||
2738 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2739 (!EncodingAlignment ||
2740 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2741 int Addr = (int)CN->getZExtValue();
2742
2743 // Otherwise, break this down into an LIS + disp.
2744 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2745
2746 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2747 MVT::i32);
2748 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2749 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2750 return true;
2751 }
2752 }
2753
2754 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2756 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2757 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2758 } else
2759 Base = N;
2760 return true; // [r+0]
2761}
2762
2763/// Similar to the 16-bit case but for instructions that take a 34-bit
2764/// displacement field (prefixed loads/stores).
2766 SDValue &Base,
2767 SelectionDAG &DAG) const {
2768 // Only on 64-bit targets.
2769 if (N.getValueType() != MVT::i64)
2770 return false;
2771
2772 SDLoc dl(N);
2773 int64_t Imm = 0;
2774
2775 if (N.getOpcode() == ISD::ADD) {
2776 if (!isIntS34Immediate(N.getOperand(1), Imm))
2777 return false;
2778 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2779 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2780 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2781 else
2782 Base = N.getOperand(0);
2783 return true;
2784 }
2785
2786 if (N.getOpcode() == ISD::OR) {
2787 if (!isIntS34Immediate(N.getOperand(1), Imm))
2788 return false;
2789 // If this is an or of disjoint bitfields, we can codegen this as an add
2790 // (for better address arithmetic) if the LHS and RHS of the OR are
2791 // provably disjoint.
2792 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2793 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2794 return false;
2795 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2796 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2797 else
2798 Base = N.getOperand(0);
2799 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2800 return true;
2801 }
2802
2803 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2804 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2805 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2806 return true;
2807 }
2808
2809 return false;
2810}
2811
2812/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2813/// represented as an indexed [r+r] operation.
2815 SDValue &Index,
2816 SelectionDAG &DAG) const {
2817 // Check to see if we can easily represent this as an [r+r] address. This
2818 // will fail if it thinks that the address is more profitably represented as
2819 // reg+imm, e.g. where imm = 0.
2820 if (SelectAddressRegReg(N, Base, Index, DAG))
2821 return true;
2822
2823 // If the address is the result of an add, we will utilize the fact that the
2824 // address calculation includes an implicit add. However, we can reduce
2825 // register pressure if we do not materialize a constant just for use as the
2826 // index register. We only get rid of the add if it is not an add of a
2827 // value and a 16-bit signed constant and both have a single use.
2828 int16_t imm = 0;
2829 if (N.getOpcode() == ISD::ADD &&
2830 (!isIntS16Immediate(N.getOperand(1), imm) ||
2831 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2832 Base = N.getOperand(0);
2833 Index = N.getOperand(1);
2834 return true;
2835 }
2836
2837 // Otherwise, do it the hard way, using R0 as the base register.
2838 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2839 N.getValueType());
2840 Index = N;
2841 return true;
2842}
2843
2844template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2845 Ty *PCRelCand = dyn_cast<Ty>(N);
2846 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2847}
2848
2849/// Returns true if this address is a PC Relative address.
2850/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2851/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2853 // This is a materialize PC Relative node. Always select this as PC Relative.
2854 Base = N;
2855 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2856 return true;
2861 return true;
2862 return false;
2863}
2864
2865/// Returns true if we should use a direct load into vector instruction
2866/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2867static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2868
2869 // If there are any other uses other than scalar to vector, then we should
2870 // keep it as a scalar load -> direct move pattern to prevent multiple
2871 // loads.
2873 if (!LD)
2874 return false;
2875
2876 EVT MemVT = LD->getMemoryVT();
2877 if (!MemVT.isSimple())
2878 return false;
2879 switch(MemVT.getSimpleVT().SimpleTy) {
2880 case MVT::i64:
2881 break;
2882 case MVT::i32:
2883 if (!ST.hasP8Vector())
2884 return false;
2885 break;
2886 case MVT::i16:
2887 case MVT::i8:
2888 if (!ST.hasP9Vector())
2889 return false;
2890 break;
2891 default:
2892 return false;
2893 }
2894
2895 SDValue LoadedVal(N, 0);
2896 if (!LoadedVal.hasOneUse())
2897 return false;
2898
2899 for (SDUse &Use : LD->uses())
2900 if (Use.getResNo() == 0 &&
2901 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2902 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2903 return false;
2904
2905 return true;
2906}
2907
2908/// getPreIndexedAddressParts - returns true by value, base pointer and
2909/// offset pointer and addressing mode by reference if the node's address
2910/// can be legally represented as pre-indexed load / store address.
2912 SDValue &Offset,
2914 SelectionDAG &DAG) const {
2915 if (DisablePPCPreinc) return false;
2916
2917 bool isLoad = true;
2918 SDValue Ptr;
2919 EVT VT;
2920 Align Alignment;
2921 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2922 Ptr = LD->getBasePtr();
2923 VT = LD->getMemoryVT();
2924 Alignment = LD->getAlign();
2925 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2926 Ptr = ST->getBasePtr();
2927 VT = ST->getMemoryVT();
2928 Alignment = ST->getAlign();
2929 isLoad = false;
2930 } else
2931 return false;
2932
2933 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2934 // instructions because we can fold these into a more efficient instruction
2935 // instead, (such as LXSD).
2936 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2937 return false;
2938 }
2939
2940 // PowerPC doesn't have preinc load/store instructions for vectors
2941 if (VT.isVector())
2942 return false;
2943
2944 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2945 // Common code will reject creating a pre-inc form if the base pointer
2946 // is a frame index, or if N is a store and the base pointer is either
2947 // the same as or a predecessor of the value being stored. Check for
2948 // those situations here, and try with swapped Base/Offset instead.
2949 bool Swap = false;
2950
2952 Swap = true;
2953 else if (!isLoad) {
2954 SDValue Val = cast<StoreSDNode>(N)->getValue();
2955 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2956 Swap = true;
2957 }
2958
2959 if (Swap)
2961
2962 AM = ISD::PRE_INC;
2963 return true;
2964 }
2965
2966 // LDU/STU can only handle immediates that are a multiple of 4.
2967 if (VT != MVT::i64) {
2968 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2969 return false;
2970 } else {
2971 // LDU/STU need an address with at least 4-byte alignment.
2972 if (Alignment < Align(4))
2973 return false;
2974
2975 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2976 return false;
2977 }
2978
2979 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2980 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2981 // sext i32 to i64 when addr mode is r+i.
2982 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2983 LD->getExtensionType() == ISD::SEXTLOAD &&
2985 return false;
2986 }
2987
2988 AM = ISD::PRE_INC;
2989 return true;
2990}
2991
2992//===----------------------------------------------------------------------===//
2993// LowerOperation implementation
2994//===----------------------------------------------------------------------===//
2995
2996/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2997/// and LoOpFlags to the target MO flags.
2998static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2999 unsigned &HiOpFlags, unsigned &LoOpFlags,
3000 const GlobalValue *GV = nullptr) {
3001 HiOpFlags = PPCII::MO_HA;
3002 LoOpFlags = PPCII::MO_LO;
3003
3004 // Don't use the pic base if not in PIC relocation model.
3005 if (IsPIC) {
3006 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3007 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3008 }
3009}
3010
3011static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3012 SelectionDAG &DAG) {
3013 SDLoc DL(HiPart);
3014 EVT PtrVT = HiPart.getValueType();
3015 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3016
3017 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3018 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3019
3020 // With PIC, the first instruction is actually "GR+hi(&G)".
3021 if (isPIC)
3022 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3023 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3024
3025 // Generate non-pic code that has direct accesses to the constant pool.
3026 // The address of the global is just (hi(&g)+lo(&g)).
3027 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3028}
3029
3031 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3032 FuncInfo->setUsesTOCBasePtr();
3033}
3034
3038
3039SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3040 SDValue GA) const {
3041 EVT VT = Subtarget.getScalarIntVT();
3042 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3043 : Subtarget.isAIXABI()
3044 ? DAG.getRegister(PPC::R2, VT)
3045 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3046 SDValue Ops[] = { GA, Reg };
3047 return DAG.getMemIntrinsicNode(
3048 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3051}
3052
3053SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3054 SelectionDAG &DAG) const {
3055 EVT PtrVT = Op.getValueType();
3056 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3057 const Constant *C = CP->getConstVal();
3058
3059 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3060 // The actual address of the GlobalValue is stored in the TOC.
3061 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3062 if (Subtarget.isUsingPCRelativeCalls()) {
3063 SDLoc DL(CP);
3064 EVT Ty = getPointerTy(DAG.getDataLayout());
3065 SDValue ConstPool = DAG.getTargetConstantPool(
3066 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3067 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3068 }
3069 setUsesTOCBasePtr(DAG);
3070 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3071 return getTOCEntry(DAG, SDLoc(CP), GA);
3072 }
3073
3074 unsigned MOHiFlag, MOLoFlag;
3075 bool IsPIC = isPositionIndependent();
3076 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3077
3078 if (IsPIC && Subtarget.isSVR4ABI()) {
3079 SDValue GA =
3081 return getTOCEntry(DAG, SDLoc(CP), GA);
3082 }
3083
3084 SDValue CPIHi =
3085 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3086 SDValue CPILo =
3087 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3088 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3089}
3090
3091// For 64-bit PowerPC, prefer the more compact relative encodings.
3092// This trades 32 bits per jump table entry for one or two instructions
3093// on the jump site.
3100
3103 return false;
3104 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3105 return true;
3107}
3108
3110 SelectionDAG &DAG) const {
3111 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3113
3114 switch (getTargetMachine().getCodeModel()) {
3115 case CodeModel::Small:
3116 case CodeModel::Medium:
3118 default:
3119 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3121 }
3122}
3123
3124const MCExpr *
3126 unsigned JTI,
3127 MCContext &Ctx) const {
3128 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3130
3131 switch (getTargetMachine().getCodeModel()) {
3132 case CodeModel::Small:
3133 case CodeModel::Medium:
3135 default:
3136 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3137 }
3138}
3139
3140SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3141 EVT PtrVT = Op.getValueType();
3143
3144 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3145 if (Subtarget.isUsingPCRelativeCalls()) {
3146 SDLoc DL(JT);
3147 EVT Ty = getPointerTy(DAG.getDataLayout());
3148 SDValue GA =
3150 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3151 return MatAddr;
3152 }
3153
3154 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3155 // The actual address of the GlobalValue is stored in the TOC.
3156 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3157 setUsesTOCBasePtr(DAG);
3158 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3159 return getTOCEntry(DAG, SDLoc(JT), GA);
3160 }
3161
3162 unsigned MOHiFlag, MOLoFlag;
3163 bool IsPIC = isPositionIndependent();
3164 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3165
3166 if (IsPIC && Subtarget.isSVR4ABI()) {
3167 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3169 return getTOCEntry(DAG, SDLoc(GA), GA);
3170 }
3171
3172 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3173 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3174 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3175}
3176
3177SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3178 SelectionDAG &DAG) const {
3179 EVT PtrVT = Op.getValueType();
3180 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3181 const BlockAddress *BA = BASDN->getBlockAddress();
3182
3183 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3184 if (Subtarget.isUsingPCRelativeCalls()) {
3185 SDLoc DL(BASDN);
3186 EVT Ty = getPointerTy(DAG.getDataLayout());
3187 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3189 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3190 return MatAddr;
3191 }
3192
3193 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3194 // The actual BlockAddress is stored in the TOC.
3195 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3196 setUsesTOCBasePtr(DAG);
3197 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3198 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3199 }
3200
3201 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3202 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3203 return getTOCEntry(
3204 DAG, SDLoc(BASDN),
3205 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3206
3207 unsigned MOHiFlag, MOLoFlag;
3208 bool IsPIC = isPositionIndependent();
3209 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3210 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3211 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3212 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3213}
3214
3215SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3216 SelectionDAG &DAG) const {
3217 if (Subtarget.isAIXABI())
3218 return LowerGlobalTLSAddressAIX(Op, DAG);
3219
3220 return LowerGlobalTLSAddressLinux(Op, DAG);
3221}
3222
3223/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3224/// and then apply the update.
3226 SelectionDAG &DAG,
3227 const TargetMachine &TM) {
3228 // Initialize TLS model opt setting lazily:
3229 // (1) Use initial-exec for single TLS var references within current function.
3230 // (2) Use local-dynamic for multiple TLS var references within current
3231 // function.
3232 PPCFunctionInfo *FuncInfo =
3234 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3236 // Iterate over all instructions within current function, collect all TLS
3237 // global variables (global variables taken as the first parameter to
3238 // Intrinsic::threadlocal_address).
3239 const Function &Func = DAG.getMachineFunction().getFunction();
3240 for (const BasicBlock &BB : Func)
3241 for (const Instruction &I : BB)
3242 if (I.getOpcode() == Instruction::Call)
3243 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3244 if (Function *CF = CI->getCalledFunction())
3245 if (CF->isDeclaration() &&
3246 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3247 if (const GlobalValue *GV =
3248 dyn_cast<GlobalValue>(I.getOperand(0))) {
3249 TLSModel::Model GVModel = TM.getTLSModel(GV);
3250 if (GVModel == TLSModel::LocalDynamic)
3251 TLSGV.insert(GV);
3252 }
3253
3254 unsigned TLSGVCnt = TLSGV.size();
3255 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3256 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3257 FuncInfo->setAIXFuncUseTLSIEForLD();
3259 }
3260
3261 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3262 LLVM_DEBUG(
3263 dbgs() << DAG.getMachineFunction().getName()
3264 << " function is using the TLS-IE model for TLS-LD access.\n");
3265 Model = TLSModel::InitialExec;
3266 }
3267}
3268
3269SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3270 SelectionDAG &DAG) const {
3271 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3272
3273 if (DAG.getTarget().useEmulatedTLS())
3274 report_fatal_error("Emulated TLS is not yet supported on AIX");
3275
3276 SDLoc dl(GA);
3277 const GlobalValue *GV = GA->getGlobal();
3278 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3279 bool Is64Bit = Subtarget.isPPC64();
3281
3282 // Apply update to the TLS model.
3283 if (Subtarget.hasAIXShLibTLSModelOpt())
3285
3286 // TLS variables are accessed through TOC entries.
3287 // To support this, set the DAG to use the TOC base pointer.
3288 setUsesTOCBasePtr(DAG);
3289
3290 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3291
3292 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3293 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3294 bool HasAIXSmallTLSGlobalAttr = false;
3295 SDValue VariableOffsetTGA =
3296 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3297 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3298 SDValue TLSReg;
3299
3300 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3301 if (GVar->hasAttribute("aix-small-tls"))
3302 HasAIXSmallTLSGlobalAttr = true;
3303
3304 if (Is64Bit) {
3305 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3306 // involves a load of the variable offset (from the TOC), followed by an
3307 // add of the loaded variable offset to R13 (the thread pointer).
3308 // This code sequence looks like:
3309 // ld reg1,var[TC](2)
3310 // add reg2, reg1, r13 // r13 contains the thread pointer
3311 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3312
3313 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3314 // global variable attribute, produce a faster access sequence for
3315 // local-exec TLS variables where the offset from the TLS base is encoded
3316 // as an immediate operand.
3317 //
3318 // We only utilize the faster local-exec access sequence when the TLS
3319 // variable has a size within the policy limit. We treat types that are
3320 // not sized or are empty as being over the policy size limit.
3321 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3322 IsTLSLocalExecModel) {
3323 Type *GVType = GV->getValueType();
3324 if (GVType->isSized() && !GVType->isEmptyTy() &&
3325 GV->getDataLayout().getTypeAllocSize(GVType) <=
3327 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3328 }
3329 } else {
3330 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3331 // involves loading the variable offset from the TOC, generating a call to
3332 // .__get_tpointer to get the thread pointer (which will be in R3), and
3333 // adding the two together:
3334 // lwz reg1,var[TC](2)
3335 // bla .__get_tpointer
3336 // add reg2, reg1, r3
3337 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3338
3339 // We do not implement the 32-bit version of the faster access sequence
3340 // for local-exec that is controlled by the -maix-small-local-exec-tls
3341 // option, or the "aix-small-tls" global variable attribute.
3342 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3343 report_fatal_error("The small-local-exec TLS access sequence is "
3344 "currently only supported on AIX (64-bit mode).");
3345 }
3346 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3347 }
3348
3349 if (Model == TLSModel::LocalDynamic) {
3350 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3351
3352 // We do not implement the 32-bit version of the faster access sequence
3353 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3354 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3355 report_fatal_error("The small-local-dynamic TLS access sequence is "
3356 "currently only supported on AIX (64-bit mode).");
3357
3358 // For local-dynamic on AIX, we need to generate one TOC entry for each
3359 // variable offset, and a single module-handle TOC entry for the entire
3360 // file.
3361
3362 SDValue VariableOffsetTGA =
3363 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3364 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3365
3367 GlobalVariable *TLSGV =
3368 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3369 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3371 assert(TLSGV && "Not able to create GV for _$TLSML.");
3372 SDValue ModuleHandleTGA =
3373 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3374 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3375 SDValue ModuleHandle =
3376 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3377
3378 // With the -maix-small-local-dynamic-tls option, produce a faster access
3379 // sequence for local-dynamic TLS variables where the offset from the
3380 // module-handle is encoded as an immediate operand.
3381 //
3382 // We only utilize the faster local-dynamic access sequence when the TLS
3383 // variable has a size within the policy limit. We treat types that are
3384 // not sized or are empty as being over the policy size limit.
3385 if (HasAIXSmallLocalDynamicTLS) {
3386 Type *GVType = GV->getValueType();
3387 if (GVType->isSized() && !GVType->isEmptyTy() &&
3388 GV->getDataLayout().getTypeAllocSize(GVType) <=
3390 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3391 ModuleHandle);
3392 }
3393
3394 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3395 }
3396
3397 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3398 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3399 // need to generate two TOC entries, one for the variable offset, one for the
3400 // region handle. The global address for the TOC entry of the region handle is
3401 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3402 // entry of the variable offset is created with MO_TLSGD_FLAG.
3403 SDValue VariableOffsetTGA =
3404 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3405 SDValue RegionHandleTGA =
3406 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3407 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3408 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3409 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3410 RegionHandle);
3411}
3412
3413SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3414 SelectionDAG &DAG) const {
3415 // FIXME: TLS addresses currently use medium model code sequences,
3416 // which is the most useful form. Eventually support for small and
3417 // large models could be added if users need it, at the cost of
3418 // additional complexity.
3419 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3420 if (DAG.getTarget().useEmulatedTLS())
3421 return LowerToTLSEmulatedModel(GA, DAG);
3422
3423 SDLoc dl(GA);
3424 const GlobalValue *GV = GA->getGlobal();
3425 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3426 bool is64bit = Subtarget.isPPC64();
3427 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3428 PICLevel::Level picLevel = M->getPICLevel();
3429
3430 const TargetMachine &TM = getTargetMachine();
3431 TLSModel::Model Model = TM.getTLSModel(GV);
3432
3433 if (Model == TLSModel::LocalExec) {
3434 if (Subtarget.isUsingPCRelativeCalls()) {
3435 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3436 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3438 SDValue MatAddr =
3439 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3440 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3441 }
3442
3443 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3445 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3447 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3448 : DAG.getRegister(PPC::R2, MVT::i32);
3449
3450 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3451 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3452 }
3453
3454 if (Model == TLSModel::InitialExec) {
3455 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3457 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3458 SDValue TGATLS = DAG.getTargetGlobalAddress(
3459 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3460 SDValue TPOffset;
3461 if (IsPCRel) {
3462 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3463 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3464 MachinePointerInfo());
3465 } else {
3466 SDValue GOTPtr;
3467 if (is64bit) {
3468 setUsesTOCBasePtr(DAG);
3469 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3470 GOTPtr =
3471 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3472 } else {
3473 if (!TM.isPositionIndependent())
3474 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3475 else if (picLevel == PICLevel::SmallPIC)
3476 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3477 else
3478 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3479 }
3480 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3481 }
3482 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3483 }
3484
3485 if (Model == TLSModel::GeneralDynamic) {
3486 if (Subtarget.isUsingPCRelativeCalls()) {
3487 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3489 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3490 }
3491
3492 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3493 SDValue GOTPtr;
3494 if (is64bit) {
3495 setUsesTOCBasePtr(DAG);
3496 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3497 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3498 GOTReg, TGA);
3499 } else {
3500 if (picLevel == PICLevel::SmallPIC)
3501 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3502 else
3503 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3504 }
3505 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3506 GOTPtr, TGA, TGA);
3507 }
3508
3509 if (Model == TLSModel::LocalDynamic) {
3510 if (Subtarget.isUsingPCRelativeCalls()) {
3511 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3513 SDValue MatPCRel =
3514 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3515 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3516 }
3517
3518 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3519 SDValue GOTPtr;
3520 if (is64bit) {
3521 setUsesTOCBasePtr(DAG);
3522 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3523 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3524 GOTReg, TGA);
3525 } else {
3526 if (picLevel == PICLevel::SmallPIC)
3527 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3528 else
3529 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3530 }
3531 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3532 PtrVT, GOTPtr, TGA, TGA);
3533 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3534 PtrVT, TLSAddr, TGA);
3535 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3536 }
3537
3538 llvm_unreachable("Unknown TLS model!");
3539}
3540
3541SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3542 SelectionDAG &DAG) const {
3543 EVT PtrVT = Op.getValueType();
3544 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3545 SDLoc DL(GSDN);
3546 const GlobalValue *GV = GSDN->getGlobal();
3547
3548 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3549 // The actual address of the GlobalValue is stored in the TOC.
3550 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3551 if (Subtarget.isUsingPCRelativeCalls()) {
3552 EVT Ty = getPointerTy(DAG.getDataLayout());
3554 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3556 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3557 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3558 MachinePointerInfo());
3559 return Load;
3560 } else {
3561 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3563 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3564 }
3565 }
3566 setUsesTOCBasePtr(DAG);
3567 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3568 return getTOCEntry(DAG, DL, GA);
3569 }
3570
3571 unsigned MOHiFlag, MOLoFlag;
3572 bool IsPIC = isPositionIndependent();
3573 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3574
3575 if (IsPIC && Subtarget.isSVR4ABI()) {
3576 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3577 GSDN->getOffset(),
3579 return getTOCEntry(DAG, DL, GA);
3580 }
3581
3582 SDValue GAHi =
3583 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3584 SDValue GALo =
3585 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3586
3587 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3588}
3589
3590SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3591 bool IsStrict = Op->isStrictFPOpcode();
3592 ISD::CondCode CC =
3593 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3594 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3595 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3596 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3597 EVT LHSVT = LHS.getValueType();
3598 SDLoc dl(Op);
3599
3600 // Soften the setcc with libcall if it is fp128.
3601 if (LHSVT == MVT::f128) {
3602 assert(!Subtarget.hasP9Vector() &&
3603 "SETCC for f128 is already legal under Power9!");
3604 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3605 Op->getOpcode() == ISD::STRICT_FSETCCS);
3606 if (RHS.getNode())
3607 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3608 DAG.getCondCode(CC));
3609 if (IsStrict)
3610 return DAG.getMergeValues({LHS, Chain}, dl);
3611 return LHS;
3612 }
3613
3614 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3615
3616 if (Op.getValueType() == MVT::v2i64) {
3617 // When the operands themselves are v2i64 values, we need to do something
3618 // special because VSX has no underlying comparison operations for these.
3619 if (LHS.getValueType() == MVT::v2i64) {
3620 // Equality can be handled by casting to the legal type for Altivec
3621 // comparisons, everything else needs to be expanded.
3622 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3623 return SDValue();
3624 SDValue SetCC32 = DAG.getSetCC(
3625 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3626 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3627 int ShuffV[] = {1, 0, 3, 2};
3628 SDValue Shuff =
3629 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3630 return DAG.getBitcast(MVT::v2i64,
3631 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3632 dl, MVT::v4i32, Shuff, SetCC32));
3633 }
3634
3635 // We handle most of these in the usual way.
3636 return Op;
3637 }
3638
3639 // If we're comparing for equality to zero, expose the fact that this is
3640 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3641 // fold the new nodes.
3642 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3643 return V;
3644
3645 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3646 // Leave comparisons against 0 and -1 alone for now, since they're usually
3647 // optimized. FIXME: revisit this when we can custom lower all setcc
3648 // optimizations.
3649 if (C->isAllOnes() || C->isZero())
3650 return SDValue();
3651 }
3652
3653 // If we have an integer seteq/setne, turn it into a compare against zero
3654 // by xor'ing the rhs with the lhs, which is faster than setting a
3655 // condition register, reading it back out, and masking the correct bit. The
3656 // normal approach here uses sub to do this instead of xor. Using xor exposes
3657 // the result to other bit-twiddling opportunities.
3658 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3659 EVT VT = Op.getValueType();
3660 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3661 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3662 }
3663 return SDValue();
3664}
3665
3666SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3667 SDNode *Node = Op.getNode();
3668 EVT VT = Node->getValueType(0);
3669 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3670 SDValue InChain = Node->getOperand(0);
3671 SDValue VAListPtr = Node->getOperand(1);
3672 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3673 SDLoc dl(Node);
3674
3675 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3676
3677 // gpr_index
3678 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3679 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3680 InChain = GprIndex.getValue(1);
3681
3682 if (VT == MVT::i64) {
3683 // Check if GprIndex is even
3684 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3685 DAG.getConstant(1, dl, MVT::i32));
3686 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3687 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3688 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3689 DAG.getConstant(1, dl, MVT::i32));
3690 // Align GprIndex to be even if it isn't
3691 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3692 GprIndex);
3693 }
3694
3695 // fpr index is 1 byte after gpr
3696 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3697 DAG.getConstant(1, dl, MVT::i32));
3698
3699 // fpr
3700 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3701 FprPtr, MachinePointerInfo(SV), MVT::i8);
3702 InChain = FprIndex.getValue(1);
3703
3704 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3705 DAG.getConstant(8, dl, MVT::i32));
3706
3707 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3708 DAG.getConstant(4, dl, MVT::i32));
3709
3710 // areas
3711 SDValue OverflowArea =
3712 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3713 InChain = OverflowArea.getValue(1);
3714
3715 SDValue RegSaveArea =
3716 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3717 InChain = RegSaveArea.getValue(1);
3718
3719 // select overflow_area if index > 8
3720 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3721 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3722
3723 // adjustment constant gpr_index * 4/8
3724 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3725 VT.isInteger() ? GprIndex : FprIndex,
3726 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3727 MVT::i32));
3728
3729 // OurReg = RegSaveArea + RegConstant
3730 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3731 RegConstant);
3732
3733 // Floating types are 32 bytes into RegSaveArea
3734 if (VT.isFloatingPoint())
3735 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3736 DAG.getConstant(32, dl, MVT::i32));
3737
3738 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3739 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3740 VT.isInteger() ? GprIndex : FprIndex,
3741 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3742 MVT::i32));
3743
3744 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3745 VT.isInteger() ? VAListPtr : FprPtr,
3746 MachinePointerInfo(SV), MVT::i8);
3747
3748 // determine if we should load from reg_save_area or overflow_area
3749 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3750
3751 // increase overflow_area by 4/8 if gpr/fpr > 8
3752 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3753 DAG.getConstant(VT.isInteger() ? 4 : 8,
3754 dl, MVT::i32));
3755
3756 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3757 OverflowAreaPlusN);
3758
3759 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3760 MachinePointerInfo(), MVT::i32);
3761
3762 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3763}
3764
3765SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3766 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3767
3768 // We have to copy the entire va_list struct:
3769 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3770 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3771 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3772 false, true, /*CI=*/nullptr, std::nullopt,
3773 MachinePointerInfo(), MachinePointerInfo());
3774}
3775
3776SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3777 SelectionDAG &DAG) const {
3778 return Op.getOperand(0);
3779}
3780
3781SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3782 MachineFunction &MF = DAG.getMachineFunction();
3783 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3784
3785 assert((Op.getOpcode() == ISD::INLINEASM ||
3786 Op.getOpcode() == ISD::INLINEASM_BR) &&
3787 "Expecting Inline ASM node.");
3788
3789 // If an LR store is already known to be required then there is not point in
3790 // checking this ASM as well.
3791 if (MFI.isLRStoreRequired())
3792 return Op;
3793
3794 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3795 // type MVT::Glue. We want to ignore this last operand if that is the case.
3796 unsigned NumOps = Op.getNumOperands();
3797 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3798 --NumOps;
3799
3800 // Check all operands that may contain the LR.
3801 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3802 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3803 unsigned NumVals = Flags.getNumOperandRegisters();
3804 ++i; // Skip the ID value.
3805
3806 switch (Flags.getKind()) {
3807 default:
3808 llvm_unreachable("Bad flags!");
3812 i += NumVals;
3813 break;
3817 for (; NumVals; --NumVals, ++i) {
3818 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3819 if (Reg != PPC::LR && Reg != PPC::LR8)
3820 continue;
3821 MFI.setLRStoreRequired();
3822 return Op;
3823 }
3824 break;
3825 }
3826 }
3827 }
3828
3829 return Op;
3830}
3831
3832SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3833 SelectionDAG &DAG) const {
3834 SDValue Chain = Op.getOperand(0);
3835 SDValue Trmp = Op.getOperand(1); // trampoline
3836 SDValue FPtr = Op.getOperand(2); // nested function
3837 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3838 SDLoc dl(Op);
3839
3840 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3841
3842 if (Subtarget.isAIXABI()) {
3843 // On AIX we create a trampoline descriptor by combining the
3844 // entry point and TOC from the global descriptor (FPtr) with the
3845 // nest argument as the environment pointer.
3846 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3847 MaybeAlign PointerAlign(PointerSize);
3848 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3851 : MachineMemOperand::MONone;
3852
3853 uint64_t TOCPointerOffset = 1 * PointerSize;
3854 uint64_t EnvPointerOffset = 2 * PointerSize;
3855 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3856 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3857
3858 const Value *TrampolineAddr =
3859 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3860 const Function *Func =
3861 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3862
3863 SDValue OutChains[3];
3864
3865 // Copy the entry point address from the global descriptor to the
3866 // trampoline buffer.
3867 SDValue LoadEntryPoint =
3868 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3869 PointerAlign, MMOFlags);
3870 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3871 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3872 MachinePointerInfo(TrampolineAddr, 0));
3873
3874 // Copy the TOC pointer from the global descriptor to the trampoline
3875 // buffer.
3876 SDValue TOCFromDescriptorPtr =
3877 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3878 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3879 MachinePointerInfo(Func, TOCPointerOffset),
3880 PointerAlign, MMOFlags);
3881 SDValue TrampolineTOCPointer =
3882 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3883 SDValue TOCLoadChain = TOCReg.getValue(1);
3884 OutChains[1] =
3885 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3886 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3887
3888 // Store the nest argument into the environment pointer in the trampoline
3889 // buffer.
3890 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3891 OutChains[2] =
3892 DAG.getStore(Chain, dl, Nest, EnvPointer,
3893 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3894
3896 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3897 return TokenFactor;
3898 }
3899
3900 bool isPPC64 = (PtrVT == MVT::i64);
3901 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3902
3904 Args.emplace_back(Trmp, IntPtrTy);
3905 // TrampSize == (isPPC64 ? 48 : 40);
3906 Args.emplace_back(
3907 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3908 IntPtrTy);
3909 Args.emplace_back(FPtr, IntPtrTy);
3910 Args.emplace_back(Nest, IntPtrTy);
3911
3912 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3913 TargetLowering::CallLoweringInfo CLI(DAG);
3914 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3916 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3917
3918 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3919 return CallResult.second;
3920}
3921
3922SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3923 MachineFunction &MF = DAG.getMachineFunction();
3924 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3925 EVT PtrVT = getPointerTy(MF.getDataLayout());
3926
3927 SDLoc dl(Op);
3928
3929 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3930 // vastart just stores the address of the VarArgsFrameIndex slot into the
3931 // memory location argument.
3932 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3933 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3934 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3935 MachinePointerInfo(SV));
3936 }
3937
3938 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3939 // We suppose the given va_list is already allocated.
3940 //
3941 // typedef struct {
3942 // char gpr; /* index into the array of 8 GPRs
3943 // * stored in the register save area
3944 // * gpr=0 corresponds to r3,
3945 // * gpr=1 to r4, etc.
3946 // */
3947 // char fpr; /* index into the array of 8 FPRs
3948 // * stored in the register save area
3949 // * fpr=0 corresponds to f1,
3950 // * fpr=1 to f2, etc.
3951 // */
3952 // char *overflow_arg_area;
3953 // /* location on stack that holds
3954 // * the next overflow argument
3955 // */
3956 // char *reg_save_area;
3957 // /* where r3:r10 and f1:f8 (if saved)
3958 // * are stored
3959 // */
3960 // } va_list[1];
3961
3962 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3963 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3964 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3965 PtrVT);
3966 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3967 PtrVT);
3968
3969 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3970 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3971
3972 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3973 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3974
3975 uint64_t FPROffset = 1;
3976 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3977
3978 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3979
3980 // Store first byte : number of int regs
3981 SDValue firstStore =
3982 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3983 MachinePointerInfo(SV), MVT::i8);
3984 uint64_t nextOffset = FPROffset;
3985 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3986 ConstFPROffset);
3987
3988 // Store second byte : number of float regs
3989 SDValue secondStore =
3990 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3991 MachinePointerInfo(SV, nextOffset), MVT::i8);
3992 nextOffset += StackOffset;
3993 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3994
3995 // Store second word : arguments given on stack
3996 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3997 MachinePointerInfo(SV, nextOffset));
3998 nextOffset += FrameOffset;
3999 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4000
4001 // Store third word : arguments given in registers
4002 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4003 MachinePointerInfo(SV, nextOffset));
4004}
4005
4006/// FPR - The set of FP registers that should be allocated for arguments
4007/// on Darwin and AIX.
4008static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4009 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4010 PPC::F11, PPC::F12, PPC::F13};
4011
4012/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4013/// the stack.
4014static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4015 unsigned PtrByteSize) {
4016 unsigned ArgSize = ArgVT.getStoreSize();
4017 if (Flags.isByVal())
4018 ArgSize = Flags.getByValSize();
4019
4020 // Round up to multiples of the pointer size, except for array members,
4021 // which are always packed.
4022 if (!Flags.isInConsecutiveRegs())
4023 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4024
4025 return ArgSize;
4026}
4027
4028/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4029/// on the stack.
4031 ISD::ArgFlagsTy Flags,
4032 unsigned PtrByteSize) {
4033 Align Alignment(PtrByteSize);
4034
4035 // Altivec parameters are padded to a 16 byte boundary.
4036 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4037 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4038 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4039 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4040 Alignment = Align(16);
4041
4042 // ByVal parameters are aligned as requested.
4043 if (Flags.isByVal()) {
4044 auto BVAlign = Flags.getNonZeroByValAlign();
4045 if (BVAlign > PtrByteSize) {
4046 if (BVAlign.value() % PtrByteSize != 0)
4048 "ByVal alignment is not a multiple of the pointer size");
4049
4050 Alignment = BVAlign;
4051 }
4052 }
4053
4054 // Array members are always packed to their original alignment.
4055 if (Flags.isInConsecutiveRegs()) {
4056 // If the array member was split into multiple registers, the first
4057 // needs to be aligned to the size of the full type. (Except for
4058 // ppcf128, which is only aligned as its f64 components.)
4059 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4060 Alignment = Align(OrigVT.getStoreSize());
4061 else
4062 Alignment = Align(ArgVT.getStoreSize());
4063 }
4064
4065 return Alignment;
4066}
4067
4068/// CalculateStackSlotUsed - Return whether this argument will use its
4069/// stack slot (instead of being passed in registers). ArgOffset,
4070/// AvailableFPRs, and AvailableVRs must hold the current argument
4071/// position, and will be updated to account for this argument.
4072static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4073 unsigned PtrByteSize, unsigned LinkageSize,
4074 unsigned ParamAreaSize, unsigned &ArgOffset,
4075 unsigned &AvailableFPRs,
4076 unsigned &AvailableVRs) {
4077 bool UseMemory = false;
4078
4079 // Respect alignment of argument on the stack.
4080 Align Alignment =
4081 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4082 ArgOffset = alignTo(ArgOffset, Alignment);
4083 // If there's no space left in the argument save area, we must
4084 // use memory (this check also catches zero-sized arguments).
4085 if (ArgOffset >= LinkageSize + ParamAreaSize)
4086 UseMemory = true;
4087
4088 // Allocate argument on the stack.
4089 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4090 if (Flags.isInConsecutiveRegsLast())
4091 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4092 // If we overran the argument save area, we must use memory
4093 // (this check catches arguments passed partially in memory)
4094 if (ArgOffset > LinkageSize + ParamAreaSize)
4095 UseMemory = true;
4096
4097 // However, if the argument is actually passed in an FPR or a VR,
4098 // we don't use memory after all.
4099 if (!Flags.isByVal()) {
4100 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4101 if (AvailableFPRs > 0) {
4102 --AvailableFPRs;
4103 return false;
4104 }
4105 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4106 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4107 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4108 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4109 if (AvailableVRs > 0) {
4110 --AvailableVRs;
4111 return false;
4112 }
4113 }
4114
4115 return UseMemory;
4116}
4117
4118/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4119/// ensure minimum alignment required for target.
4121 unsigned NumBytes) {
4122 return alignTo(NumBytes, Lowering->getStackAlign());
4123}
4124
4125SDValue PPCTargetLowering::LowerFormalArguments(
4126 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4127 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4128 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4129 if (Subtarget.isAIXABI())
4130 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4131 InVals);
4132 if (Subtarget.is64BitELFABI())
4133 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4134 InVals);
4135 assert(Subtarget.is32BitELFABI());
4136 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4137 InVals);
4138}
4139
4140SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4141 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4142 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4143 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4144
4145 // 32-bit SVR4 ABI Stack Frame Layout:
4146 // +-----------------------------------+
4147 // +--> | Back chain |
4148 // | +-----------------------------------+
4149 // | | Floating-point register save area |
4150 // | +-----------------------------------+
4151 // | | General register save area |
4152 // | +-----------------------------------+
4153 // | | CR save word |
4154 // | +-----------------------------------+
4155 // | | VRSAVE save word |
4156 // | +-----------------------------------+
4157 // | | Alignment padding |
4158 // | +-----------------------------------+
4159 // | | Vector register save area |
4160 // | +-----------------------------------+
4161 // | | Local variable space |
4162 // | +-----------------------------------+
4163 // | | Parameter list area |
4164 // | +-----------------------------------+
4165 // | | LR save word |
4166 // | +-----------------------------------+
4167 // SP--> +--- | Back chain |
4168 // +-----------------------------------+
4169 //
4170 // Specifications:
4171 // System V Application Binary Interface PowerPC Processor Supplement
4172 // AltiVec Technology Programming Interface Manual
4173
4174 MachineFunction &MF = DAG.getMachineFunction();
4175 MachineFrameInfo &MFI = MF.getFrameInfo();
4176 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4177
4178 EVT PtrVT = getPointerTy(MF.getDataLayout());
4179 // Potential tail calls could cause overwriting of argument stack slots.
4180 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4181 (CallConv == CallingConv::Fast));
4182 const Align PtrAlign(4);
4183
4184 // Assign locations to all of the incoming arguments.
4186 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4187 *DAG.getContext());
4188
4189 // Reserve space for the linkage area on the stack.
4190 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4191 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4192 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4193
4194 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4195 CCValAssign &VA = ArgLocs[i];
4196
4197 // Arguments stored in registers.
4198 if (VA.isRegLoc()) {
4199 const TargetRegisterClass *RC;
4200 EVT ValVT = VA.getValVT();
4201
4202 switch (ValVT.getSimpleVT().SimpleTy) {
4203 default:
4204 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4205 case MVT::i1:
4206 case MVT::i32:
4207 RC = &PPC::GPRCRegClass;
4208 break;
4209 case MVT::f32:
4210 if (Subtarget.hasP8Vector())
4211 RC = &PPC::VSSRCRegClass;
4212 else if (Subtarget.hasSPE())
4213 RC = &PPC::GPRCRegClass;
4214 else
4215 RC = &PPC::F4RCRegClass;
4216 break;
4217 case MVT::f64:
4218 if (Subtarget.hasVSX())
4219 RC = &PPC::VSFRCRegClass;
4220 else if (Subtarget.hasSPE())
4221 // SPE passes doubles in GPR pairs.
4222 RC = &PPC::GPRCRegClass;
4223 else
4224 RC = &PPC::F8RCRegClass;
4225 break;
4226 case MVT::v16i8:
4227 case MVT::v8i16:
4228 case MVT::v4i32:
4229 RC = &PPC::VRRCRegClass;
4230 break;
4231 case MVT::v4f32:
4232 RC = &PPC::VRRCRegClass;
4233 break;
4234 case MVT::v2f64:
4235 case MVT::v2i64:
4236 RC = &PPC::VRRCRegClass;
4237 break;
4238 }
4239
4240 SDValue ArgValue;
4241 // Transform the arguments stored in physical registers into
4242 // virtual ones.
4243 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4244 assert(i + 1 < e && "No second half of double precision argument");
4245 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4246 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4247 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4248 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4249 if (!Subtarget.isLittleEndian())
4250 std::swap (ArgValueLo, ArgValueHi);
4251 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4252 ArgValueHi);
4253 } else {
4254 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4255 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4256 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4257 if (ValVT == MVT::i1)
4258 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4259 }
4260
4261 InVals.push_back(ArgValue);
4262 } else {
4263 // Argument stored in memory.
4264 assert(VA.isMemLoc());
4265
4266 // Get the extended size of the argument type in stack
4267 unsigned ArgSize = VA.getLocVT().getStoreSize();
4268 // Get the actual size of the argument type
4269 unsigned ObjSize = VA.getValVT().getStoreSize();
4270 unsigned ArgOffset = VA.getLocMemOffset();
4271 // Stack objects in PPC32 are right justified.
4272 ArgOffset += ArgSize - ObjSize;
4273 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4274
4275 // Create load nodes to retrieve arguments from the stack.
4276 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4277 InVals.push_back(
4278 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4279 }
4280 }
4281
4282 // Assign locations to all of the incoming aggregate by value arguments.
4283 // Aggregates passed by value are stored in the local variable space of the
4284 // caller's stack frame, right above the parameter list area.
4285 SmallVector<CCValAssign, 16> ByValArgLocs;
4286 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4287 ByValArgLocs, *DAG.getContext());
4288
4289 // Reserve stack space for the allocations in CCInfo.
4290 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4291
4292 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4293
4294 // Area that is at least reserved in the caller of this function.
4295 unsigned MinReservedArea = CCByValInfo.getStackSize();
4296 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4297
4298 // Set the size that is at least reserved in caller of this function. Tail
4299 // call optimized function's reserved stack space needs to be aligned so that
4300 // taking the difference between two stack areas will result in an aligned
4301 // stack.
4302 MinReservedArea =
4303 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4304 FuncInfo->setMinReservedArea(MinReservedArea);
4305
4307
4308 // If the function takes variable number of arguments, make a frame index for
4309 // the start of the first vararg value... for expansion of llvm.va_start.
4310 if (isVarArg) {
4311 static const MCPhysReg GPArgRegs[] = {
4312 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4313 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4314 };
4315 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4316
4317 static const MCPhysReg FPArgRegs[] = {
4318 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4319 PPC::F8
4320 };
4321 unsigned NumFPArgRegs = std::size(FPArgRegs);
4322
4323 if (useSoftFloat() || hasSPE())
4324 NumFPArgRegs = 0;
4325
4326 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4327 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4328
4329 // Make room for NumGPArgRegs and NumFPArgRegs.
4330 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4331 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4332
4334 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4335
4336 FuncInfo->setVarArgsFrameIndex(
4337 MFI.CreateStackObject(Depth, Align(8), false));
4338 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4339
4340 // The fixed integer arguments of a variadic function are stored to the
4341 // VarArgsFrameIndex on the stack so that they may be loaded by
4342 // dereferencing the result of va_next.
4343 for (MCPhysReg GPArgReg : GPArgRegs) {
4344 // Get an existing live-in vreg, or add a new one.
4345 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4346 if (!VReg)
4347 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4348
4349 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4350 SDValue Store =
4351 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4352 MemOps.push_back(Store);
4353 // Increment the address by four for the next argument to store
4354 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4355 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4356 }
4357
4358 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4359 // is set.
4360 // The double arguments are stored to the VarArgsFrameIndex
4361 // on the stack.
4362 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4363 // Get an existing live-in vreg, or add a new one.
4364 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4365 if (!VReg)
4366 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4367
4368 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4369 SDValue Store =
4370 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4371 MemOps.push_back(Store);
4372 // Increment the address by eight for the next argument to store
4373 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4374 PtrVT);
4375 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4376 }
4377 }
4378
4379 if (!MemOps.empty())
4380 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4381
4382 return Chain;
4383}
4384
4385// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4386// value to MVT::i64 and then truncate to the correct register size.
4387SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4388 EVT ObjectVT, SelectionDAG &DAG,
4389 SDValue ArgVal,
4390 const SDLoc &dl) const {
4391 if (Flags.isSExt())
4392 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4393 DAG.getValueType(ObjectVT));
4394 else if (Flags.isZExt())
4395 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4396 DAG.getValueType(ObjectVT));
4397
4398 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4399}
4400
4401SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4402 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4403 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4404 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4405 // TODO: add description of PPC stack frame format, or at least some docs.
4406 //
4407 bool isELFv2ABI = Subtarget.isELFv2ABI();
4408 bool isLittleEndian = Subtarget.isLittleEndian();
4409 MachineFunction &MF = DAG.getMachineFunction();
4410 MachineFrameInfo &MFI = MF.getFrameInfo();
4411 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4412
4413 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4414 "fastcc not supported on varargs functions");
4415
4416 EVT PtrVT = getPointerTy(MF.getDataLayout());
4417 // Potential tail calls could cause overwriting of argument stack slots.
4418 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4419 (CallConv == CallingConv::Fast));
4420 unsigned PtrByteSize = 8;
4421 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4422
4423 static const MCPhysReg GPR[] = {
4424 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4425 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4426 };
4427 static const MCPhysReg VR[] = {
4428 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4429 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4430 };
4431
4432 const unsigned Num_GPR_Regs = std::size(GPR);
4433 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4434 const unsigned Num_VR_Regs = std::size(VR);
4435
4436 // Do a first pass over the arguments to determine whether the ABI
4437 // guarantees that our caller has allocated the parameter save area
4438 // on its stack frame. In the ELFv1 ABI, this is always the case;
4439 // in the ELFv2 ABI, it is true if this is a vararg function or if
4440 // any parameter is located in a stack slot.
4441
4442 bool HasParameterArea = !isELFv2ABI || isVarArg;
4443 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4444 unsigned NumBytes = LinkageSize;
4445 unsigned AvailableFPRs = Num_FPR_Regs;
4446 unsigned AvailableVRs = Num_VR_Regs;
4447 for (const ISD::InputArg &In : Ins) {
4448 if (In.Flags.isNest())
4449 continue;
4450
4451 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4452 LinkageSize, ParamAreaSize, NumBytes,
4453 AvailableFPRs, AvailableVRs))
4454 HasParameterArea = true;
4455 }
4456
4457 // Add DAG nodes to load the arguments or copy them out of registers. On
4458 // entry to a function on PPC, the arguments start after the linkage area,
4459 // although the first ones are often in registers.
4460
4461 unsigned ArgOffset = LinkageSize;
4462 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4465 unsigned CurArgIdx = 0;
4466 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4467 SDValue ArgVal;
4468 bool needsLoad = false;
4469 EVT ObjectVT = Ins[ArgNo].VT;
4470 EVT OrigVT = Ins[ArgNo].ArgVT;
4471 unsigned ObjSize = ObjectVT.getStoreSize();
4472 unsigned ArgSize = ObjSize;
4473 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4474 if (Ins[ArgNo].isOrigArg()) {
4475 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4476 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4477 }
4478 // We re-align the argument offset for each argument, except when using the
4479 // fast calling convention, when we need to make sure we do that only when
4480 // we'll actually use a stack slot.
4481 unsigned CurArgOffset;
4482 Align Alignment;
4483 auto ComputeArgOffset = [&]() {
4484 /* Respect alignment of argument on the stack. */
4485 Alignment =
4486 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4487 ArgOffset = alignTo(ArgOffset, Alignment);
4488 CurArgOffset = ArgOffset;
4489 };
4490
4491 if (CallConv != CallingConv::Fast) {
4492 ComputeArgOffset();
4493
4494 /* Compute GPR index associated with argument offset. */
4495 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4496 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4497 }
4498
4499 // FIXME the codegen can be much improved in some cases.
4500 // We do not have to keep everything in memory.
4501 if (Flags.isByVal()) {
4502 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4503
4504 if (CallConv == CallingConv::Fast)
4505 ComputeArgOffset();
4506
4507 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4508 ObjSize = Flags.getByValSize();
4509 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4510 // Empty aggregate parameters do not take up registers. Examples:
4511 // struct { } a;
4512 // union { } b;
4513 // int c[0];
4514 // etc. However, we have to provide a place-holder in InVals, so
4515 // pretend we have an 8-byte item at the current address for that
4516 // purpose.
4517 if (!ObjSize) {
4518 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4519 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4520 InVals.push_back(FIN);
4521 continue;
4522 }
4523
4524 // Create a stack object covering all stack doublewords occupied
4525 // by the argument. If the argument is (fully or partially) on
4526 // the stack, or if the argument is fully in registers but the
4527 // caller has allocated the parameter save anyway, we can refer
4528 // directly to the caller's stack frame. Otherwise, create a
4529 // local copy in our own frame.
4530 int FI;
4531 if (HasParameterArea ||
4532 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4533 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4534 else
4535 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4536 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4537
4538 // Handle aggregates smaller than 8 bytes.
4539 if (ObjSize < PtrByteSize) {
4540 // The value of the object is its address, which differs from the
4541 // address of the enclosing doubleword on big-endian systems.
4542 SDValue Arg = FIN;
4543 if (!isLittleEndian) {
4544 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4545 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4546 }
4547 InVals.push_back(Arg);
4548
4549 if (GPR_idx != Num_GPR_Regs) {
4550 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4551 FuncInfo->addLiveInAttr(VReg, Flags);
4552 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4553 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4554 SDValue Store =
4555 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4556 MachinePointerInfo(&*FuncArg), ObjType);
4557 MemOps.push_back(Store);
4558 }
4559 // Whether we copied from a register or not, advance the offset
4560 // into the parameter save area by a full doubleword.
4561 ArgOffset += PtrByteSize;
4562 continue;
4563 }
4564
4565 // The value of the object is its address, which is the address of
4566 // its first stack doubleword.
4567 InVals.push_back(FIN);
4568
4569 // Store whatever pieces of the object are in registers to memory.
4570 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4571 if (GPR_idx == Num_GPR_Regs)
4572 break;
4573
4574 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4575 FuncInfo->addLiveInAttr(VReg, Flags);
4576 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4577 SDValue Addr = FIN;
4578 if (j) {
4579 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4580 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4581 }
4582 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4583 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4584 SDValue Store =
4585 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4586 MachinePointerInfo(&*FuncArg, j), ObjType);
4587 MemOps.push_back(Store);
4588 ++GPR_idx;
4589 }
4590 ArgOffset += ArgSize;
4591 continue;
4592 }
4593
4594 switch (ObjectVT.getSimpleVT().SimpleTy) {
4595 default: llvm_unreachable("Unhandled argument type!");
4596 case MVT::i1:
4597 case MVT::i32:
4598 case MVT::i64:
4599 if (Flags.isNest()) {
4600 // The 'nest' parameter, if any, is passed in R11.
4601 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4602 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4603
4604 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4605 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4606
4607 break;
4608 }
4609
4610 // These can be scalar arguments or elements of an integer array type
4611 // passed directly. Clang may use those instead of "byval" aggregate
4612 // types to avoid forcing arguments to memory unnecessarily.
4613 if (GPR_idx != Num_GPR_Regs) {
4614 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4615 FuncInfo->addLiveInAttr(VReg, Flags);
4616 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4617
4618 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4619 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4620 // value to MVT::i64 and then truncate to the correct register size.
4621 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4622 } else {
4623 if (CallConv == CallingConv::Fast)
4624 ComputeArgOffset();
4625
4626 needsLoad = true;
4627 ArgSize = PtrByteSize;
4628 }
4629 if (CallConv != CallingConv::Fast || needsLoad)
4630 ArgOffset += 8;
4631 break;
4632
4633 case MVT::f32:
4634 case MVT::f64:
4635 // These can be scalar arguments or elements of a float array type
4636 // passed directly. The latter are used to implement ELFv2 homogenous
4637 // float aggregates.
4638 if (FPR_idx != Num_FPR_Regs) {
4639 unsigned VReg;
4640
4641 if (ObjectVT == MVT::f32)
4642 VReg = MF.addLiveIn(FPR[FPR_idx],
4643 Subtarget.hasP8Vector()
4644 ? &PPC::VSSRCRegClass
4645 : &PPC::F4RCRegClass);
4646 else
4647 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4648 ? &PPC::VSFRCRegClass
4649 : &PPC::F8RCRegClass);
4650
4651 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4652 ++FPR_idx;
4653 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4654 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4655 // once we support fp <-> gpr moves.
4656
4657 // This can only ever happen in the presence of f32 array types,
4658 // since otherwise we never run out of FPRs before running out
4659 // of GPRs.
4660 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4661 FuncInfo->addLiveInAttr(VReg, Flags);
4662 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4663
4664 if (ObjectVT == MVT::f32) {
4665 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4666 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4667 DAG.getConstant(32, dl, MVT::i32));
4668 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4669 }
4670
4671 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4672 } else {
4673 if (CallConv == CallingConv::Fast)
4674 ComputeArgOffset();
4675
4676 needsLoad = true;
4677 }
4678
4679 // When passing an array of floats, the array occupies consecutive
4680 // space in the argument area; only round up to the next doubleword
4681 // at the end of the array. Otherwise, each float takes 8 bytes.
4682 if (CallConv != CallingConv::Fast || needsLoad) {
4683 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4684 ArgOffset += ArgSize;
4685 if (Flags.isInConsecutiveRegsLast())
4686 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4687 }
4688 break;
4689 case MVT::v4f32:
4690 case MVT::v4i32:
4691 case MVT::v8i16:
4692 case MVT::v16i8:
4693 case MVT::v2f64:
4694 case MVT::v2i64:
4695 case MVT::v1i128:
4696 case MVT::f128:
4697 // These can be scalar arguments or elements of a vector array type
4698 // passed directly. The latter are used to implement ELFv2 homogenous
4699 // vector aggregates.
4700 if (VR_idx != Num_VR_Regs) {
4701 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4702 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4703 ++VR_idx;
4704 } else {
4705 if (CallConv == CallingConv::Fast)
4706 ComputeArgOffset();
4707 needsLoad = true;
4708 }
4709 if (CallConv != CallingConv::Fast || needsLoad)
4710 ArgOffset += 16;
4711 break;
4712 }
4713
4714 // We need to load the argument to a virtual register if we determined
4715 // above that we ran out of physical registers of the appropriate type.
4716 if (needsLoad) {
4717 if (ObjSize < ArgSize && !isLittleEndian)
4718 CurArgOffset += ArgSize - ObjSize;
4719 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4720 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4721 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4722 }
4723
4724 InVals.push_back(ArgVal);
4725 }
4726
4727 // Area that is at least reserved in the caller of this function.
4728 unsigned MinReservedArea;
4729 if (HasParameterArea)
4730 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4731 else
4732 MinReservedArea = LinkageSize;
4733
4734 // Set the size that is at least reserved in caller of this function. Tail
4735 // call optimized functions' reserved stack space needs to be aligned so that
4736 // taking the difference between two stack areas will result in an aligned
4737 // stack.
4738 MinReservedArea =
4739 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4740 FuncInfo->setMinReservedArea(MinReservedArea);
4741
4742 // If the function takes variable number of arguments, make a frame index for
4743 // the start of the first vararg value... for expansion of llvm.va_start.
4744 // On ELFv2ABI spec, it writes:
4745 // C programs that are intended to be *portable* across different compilers
4746 // and architectures must use the header file <stdarg.h> to deal with variable
4747 // argument lists.
4748 if (isVarArg && MFI.hasVAStart()) {
4749 int Depth = ArgOffset;
4750
4751 FuncInfo->setVarArgsFrameIndex(
4752 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4753 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4754
4755 // If this function is vararg, store any remaining integer argument regs
4756 // to their spots on the stack so that they may be loaded by dereferencing
4757 // the result of va_next.
4758 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4759 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4760 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4761 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4762 SDValue Store =
4763 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4764 MemOps.push_back(Store);
4765 // Increment the address by four for the next argument to store
4766 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4767 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4768 }
4769 }
4770
4771 if (!MemOps.empty())
4772 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4773
4774 return Chain;
4775}
4776
4777/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4778/// adjusted to accommodate the arguments for the tailcall.
4779static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4780 unsigned ParamSize) {
4781
4782 if (!isTailCall) return 0;
4783
4785 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4786 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4787 // Remember only if the new adjustment is bigger.
4788 if (SPDiff < FI->getTailCallSPDelta())
4789 FI->setTailCallSPDelta(SPDiff);
4790
4791 return SPDiff;
4792}
4793
4794static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4795
4796static bool callsShareTOCBase(const Function *Caller,
4797 const GlobalValue *CalleeGV,
4798 const TargetMachine &TM) {
4799 // It does not make sense to call callsShareTOCBase() with a caller that
4800 // is PC Relative since PC Relative callers do not have a TOC.
4801#ifndef NDEBUG
4802 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4803 assert(!STICaller->isUsingPCRelativeCalls() &&
4804 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4805#endif
4806
4807 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4808 // don't have enough information to determine if the caller and callee share
4809 // the same TOC base, so we have to pessimistically assume they don't for
4810 // correctness.
4811 if (!CalleeGV)
4812 return false;
4813
4814 // If the callee is preemptable, then the static linker will use a plt-stub
4815 // which saves the toc to the stack, and needs a nop after the call
4816 // instruction to convert to a toc-restore.
4817 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4818 return false;
4819
4820 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4821 // We may need a TOC restore in the situation where the caller requires a
4822 // valid TOC but the callee is PC Relative and does not.
4823 const Function *F = dyn_cast<Function>(CalleeGV);
4824 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4825
4826 // If we have an Alias we can try to get the function from there.
4827 if (Alias) {
4828 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4829 F = dyn_cast<Function>(GlobalObj);
4830 }
4831
4832 // If we still have no valid function pointer we do not have enough
4833 // information to determine if the callee uses PC Relative calls so we must
4834 // assume that it does.
4835 if (!F)
4836 return false;
4837
4838 // If the callee uses PC Relative we cannot guarantee that the callee won't
4839 // clobber the TOC of the caller and so we must assume that the two
4840 // functions do not share a TOC base.
4841 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4842 if (STICallee->isUsingPCRelativeCalls())
4843 return false;
4844
4845 // If the GV is not a strong definition then we need to assume it can be
4846 // replaced by another function at link time. The function that replaces
4847 // it may not share the same TOC as the caller since the callee may be
4848 // replaced by a PC Relative version of the same function.
4849 if (!CalleeGV->isStrongDefinitionForLinker())
4850 return false;
4851
4852 // The medium and large code models are expected to provide a sufficiently
4853 // large TOC to provide all data addressing needs of a module with a
4854 // single TOC.
4855 if (CodeModel::Medium == TM.getCodeModel() ||
4857 return true;
4858
4859 // Any explicitly-specified sections and section prefixes must also match.
4860 // Also, if we're using -ffunction-sections, then each function is always in
4861 // a different section (the same is true for COMDAT functions).
4862 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4863 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4864 return false;
4865 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4866 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4867 return false;
4868 }
4869
4870 return true;
4871}
4872
4873static bool
4875 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4876 assert(Subtarget.is64BitELFABI());
4877
4878 const unsigned PtrByteSize = 8;
4879 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4880
4881 static const MCPhysReg GPR[] = {
4882 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4883 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4884 };
4885 static const MCPhysReg VR[] = {
4886 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4887 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4888 };
4889
4890 const unsigned NumGPRs = std::size(GPR);
4891 const unsigned NumFPRs = 13;
4892 const unsigned NumVRs = std::size(VR);
4893 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4894
4895 unsigned NumBytes = LinkageSize;
4896 unsigned AvailableFPRs = NumFPRs;
4897 unsigned AvailableVRs = NumVRs;
4898
4899 for (const ISD::OutputArg& Param : Outs) {
4900 if (Param.Flags.isNest()) continue;
4901
4902 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4903 LinkageSize, ParamAreaSize, NumBytes,
4904 AvailableFPRs, AvailableVRs))
4905 return true;
4906 }
4907 return false;
4908}
4909
4910static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4911 if (CB.arg_size() != CallerFn->arg_size())
4912 return false;
4913
4914 auto CalleeArgIter = CB.arg_begin();
4915 auto CalleeArgEnd = CB.arg_end();
4916 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4917
4918 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4919 const Value* CalleeArg = *CalleeArgIter;
4920 const Value* CallerArg = &(*CallerArgIter);
4921 if (CalleeArg == CallerArg)
4922 continue;
4923
4924 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4925 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4926 // }
4927 // 1st argument of callee is undef and has the same type as caller.
4928 if (CalleeArg->getType() == CallerArg->getType() &&
4929 isa<UndefValue>(CalleeArg))
4930 continue;
4931
4932 return false;
4933 }
4934
4935 return true;
4936}
4937
4938// Returns true if TCO is possible between the callers and callees
4939// calling conventions.
4940static bool
4942 CallingConv::ID CalleeCC) {
4943 // Tail calls are possible with fastcc and ccc.
4944 auto isTailCallableCC = [] (CallingConv::ID CC){
4945 return CC == CallingConv::C || CC == CallingConv::Fast;
4946 };
4947 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4948 return false;
4949
4950 // We can safely tail call both fastcc and ccc callees from a c calling
4951 // convention caller. If the caller is fastcc, we may have less stack space
4952 // than a non-fastcc caller with the same signature so disable tail-calls in
4953 // that case.
4954 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4955}
4956
4957bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4958 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4959 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4961 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4962 bool isCalleeExternalSymbol) const {
4963 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4964
4965 if (DisableSCO && !TailCallOpt) return false;
4966
4967 // Variadic argument functions are not supported.
4968 if (isVarArg) return false;
4969
4970 // Check that the calling conventions are compatible for tco.
4971 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4972 return false;
4973
4974 // Caller contains any byval parameter is not supported.
4975 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4976 return false;
4977
4978 // Callee contains any byval parameter is not supported, too.
4979 // Note: This is a quick work around, because in some cases, e.g.
4980 // caller's stack size > callee's stack size, we are still able to apply
4981 // sibling call optimization. For example, gcc is able to do SCO for caller1
4982 // in the following example, but not for caller2.
4983 // struct test {
4984 // long int a;
4985 // char ary[56];
4986 // } gTest;
4987 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4988 // b->a = v.a;
4989 // return 0;
4990 // }
4991 // void caller1(struct test a, struct test c, struct test *b) {
4992 // callee(gTest, b); }
4993 // void caller2(struct test *b) { callee(gTest, b); }
4994 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4995 return false;
4996
4997 // If callee and caller use different calling conventions, we cannot pass
4998 // parameters on stack since offsets for the parameter area may be different.
4999 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5000 return false;
5001
5002 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5003 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5004 // callee potentially have different TOC bases then we cannot tail call since
5005 // we need to restore the TOC pointer after the call.
5006 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5007 // We cannot guarantee this for indirect calls or calls to external functions.
5008 // When PC-Relative addressing is used, the concept of the TOC is no longer
5009 // applicable so this check is not required.
5010 // Check first for indirect calls.
5011 if (!Subtarget.isUsingPCRelativeCalls() &&
5012 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5013 return false;
5014
5015 // Check if we share the TOC base.
5016 if (!Subtarget.isUsingPCRelativeCalls() &&
5017 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5018 return false;
5019
5020 // TCO allows altering callee ABI, so we don't have to check further.
5021 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5022 return true;
5023
5024 if (DisableSCO) return false;
5025
5026 // If callee use the same argument list that caller is using, then we can
5027 // apply SCO on this case. If it is not, then we need to check if callee needs
5028 // stack for passing arguments.
5029 // PC Relative tail calls may not have a CallBase.
5030 // If there is no CallBase we cannot verify if we have the same argument
5031 // list so assume that we don't have the same argument list.
5032 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5033 needStackSlotPassParameters(Subtarget, Outs))
5034 return false;
5035 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5036 return false;
5037
5038 return true;
5039}
5040
5041/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5042/// for tail call optimization. Targets which want to do tail call
5043/// optimization should implement this function.
5044bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5045 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5046 CallingConv::ID CallerCC, bool isVarArg,
5047 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5048 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5049 return false;
5050
5051 // Variable argument functions are not supported.
5052 if (isVarArg)
5053 return false;
5054
5055 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5056 // Functions containing by val parameters are not supported.
5057 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5058 return false;
5059
5060 // Non-PIC/GOT tail calls are supported.
5061 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5062 return true;
5063
5064 // At the moment we can only do local tail calls (in same module, hidden
5065 // or protected) if we are generating PIC.
5066 if (CalleeGV)
5067 return CalleeGV->hasHiddenVisibility() ||
5068 CalleeGV->hasProtectedVisibility();
5069 }
5070
5071 return false;
5072}
5073
5074/// isCallCompatibleAddress - Return the immediate to use if the specified
5075/// 32-bit value is representable in the immediate field of a BxA instruction.
5078 if (!C) return nullptr;
5079
5080 int Addr = C->getZExtValue();
5081 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5082 SignExtend32<26>(Addr) != Addr)
5083 return nullptr; // Top 6 bits have to be sext of immediate.
5084
5085 return DAG
5087 (int)C->getZExtValue() >> 2, SDLoc(Op),
5089 .getNode();
5090}
5091
5092namespace {
5093
5094struct TailCallArgumentInfo {
5095 SDValue Arg;
5096 SDValue FrameIdxOp;
5097 int FrameIdx = 0;
5098
5099 TailCallArgumentInfo() = default;
5100};
5101
5102} // end anonymous namespace
5103
5104/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5106 SelectionDAG &DAG, SDValue Chain,
5107 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5108 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5109 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5110 SDValue Arg = TailCallArgs[i].Arg;
5111 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5112 int FI = TailCallArgs[i].FrameIdx;
5113 // Store relative to framepointer.
5114 MemOpChains.push_back(DAG.getStore(
5115 Chain, dl, Arg, FIN,
5117 }
5118}
5119
5120/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5121/// the appropriate stack slot for the tail call optimized function call.
5123 SDValue OldRetAddr, SDValue OldFP,
5124 int SPDiff, const SDLoc &dl) {
5125 if (SPDiff) {
5126 // Calculate the new stack slot for the return address.
5128 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5129 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5130 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5131 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5132 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5133 NewRetAddrLoc, true);
5134 SDValue NewRetAddrFrIdx =
5135 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5136 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5137 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5138 }
5139 return Chain;
5140}
5141
5142/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5143/// the position of the argument.
5145 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5146 int SPDiff, unsigned ArgOffset,
5147 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5148 int Offset = ArgOffset + SPDiff;
5149 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5150 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5151 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5152 SDValue FIN = DAG.getFrameIndex(FI, VT);
5153 TailCallArgumentInfo Info;
5154 Info.Arg = Arg;
5155 Info.FrameIdxOp = FIN;
5156 Info.FrameIdx = FI;
5157 TailCallArguments.push_back(Info);
5158}
5159
5160/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5161/// stack slot. Returns the chain as result and the loaded frame pointers in
5162/// LROpOut/FPOpout. Used when tail calling.
5163SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5164 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5165 SDValue &FPOpOut, const SDLoc &dl) const {
5166 if (SPDiff) {
5167 // Load the LR and FP stack slot for later adjusting.
5168 LROpOut = getReturnAddrFrameIndex(DAG);
5169 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5170 MachinePointerInfo());
5171 Chain = SDValue(LROpOut.getNode(), 1);
5172 }
5173 return Chain;
5174}
5175
5176/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5177/// by "Src" to address "Dst" of size "Size". Alignment information is
5178/// specified by the specific parameter attribute. The copy will be passed as
5179/// a byval function parameter.
5180/// Sometimes what we are copying is the end of a larger object, the part that
5181/// does not fit in registers.
5183 SDValue Chain, ISD::ArgFlagsTy Flags,
5184 SelectionDAG &DAG, const SDLoc &dl) {
5185 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5186 return DAG.getMemcpy(
5187 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5188 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5189}
5190
5191/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5192/// tail calls.
5194 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5195 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5196 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5197 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5199 if (!isTailCall) {
5200 if (isVector) {
5201 SDValue StackPtr;
5202 if (isPPC64)
5203 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5204 else
5205 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5206 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5207 DAG.getConstant(ArgOffset, dl, PtrVT));
5208 }
5209 MemOpChains.push_back(
5210 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5211 // Calculate and remember argument location.
5212 } else
5213 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5214 TailCallArguments);
5215}
5216
5217static void
5219 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5220 SDValue FPOp,
5221 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5222 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5223 // might overwrite each other in case of tail call optimization.
5224 SmallVector<SDValue, 8> MemOpChains2;
5225 // Do not flag preceding copytoreg stuff together with the following stuff.
5226 InGlue = SDValue();
5227 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5228 MemOpChains2, dl);
5229 if (!MemOpChains2.empty())
5230 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5231
5232 // Store the return address to the appropriate stack slot.
5233 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5234
5235 // Emit callseq_end just before tailcall node.
5236 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5237 InGlue = Chain.getValue(1);
5238}
5239
5240// Is this global address that of a function that can be called by name? (as
5241// opposed to something that must hold a descriptor for an indirect call).
5242static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5243 if (GV) {
5244 if (GV->isThreadLocal())
5245 return false;
5246
5247 return GV->getValueType()->isFunctionTy();
5248 }
5249
5250 return false;
5251}
5252
5253SDValue PPCTargetLowering::LowerCallResult(
5254 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5255 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5256 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5258 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5259 *DAG.getContext());
5260
5261 CCRetInfo.AnalyzeCallResult(
5262 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5264 : RetCC_PPC);
5265
5266 // Copy all of the result registers out of their specified physreg.
5267 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5268 CCValAssign &VA = RVLocs[i];
5269 assert(VA.isRegLoc() && "Can only return in registers!");
5270
5271 SDValue Val;
5272
5273 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5274 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5275 InGlue);
5276 Chain = Lo.getValue(1);
5277 InGlue = Lo.getValue(2);
5278 VA = RVLocs[++i]; // skip ahead to next loc
5279 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5280 InGlue);
5281 Chain = Hi.getValue(1);
5282 InGlue = Hi.getValue(2);
5283 if (!Subtarget.isLittleEndian())
5284 std::swap (Lo, Hi);
5285 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5286 } else {
5287 Val = DAG.getCopyFromReg(Chain, dl,
5288 VA.getLocReg(), VA.getLocVT(), InGlue);
5289 Chain = Val.getValue(1);
5290 InGlue = Val.getValue(2);
5291 }
5292
5293 switch (VA.getLocInfo()) {
5294 default: llvm_unreachable("Unknown loc info!");
5295 case CCValAssign::Full: break;
5296 case CCValAssign::AExt:
5297 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5298 break;
5299 case CCValAssign::ZExt:
5300 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5301 DAG.getValueType(VA.getValVT()));
5302 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5303 break;
5304 case CCValAssign::SExt:
5305 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5306 DAG.getValueType(VA.getValVT()));
5307 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5308 break;
5309 }
5310
5311 InVals.push_back(Val);
5312 }
5313
5314 return Chain;
5315}
5316
5317static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5318 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5319 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5320 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5321
5322 // PatchPoint calls are not indirect.
5323 if (isPatchPoint)
5324 return false;
5325
5327 return false;
5328
5329 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5330 // becuase the immediate function pointer points to a descriptor instead of
5331 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5332 // pointer immediate points to the global entry point, while the BLA would
5333 // need to jump to the local entry point (see rL211174).
5334 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5335 isBLACompatibleAddress(Callee, DAG))
5336 return false;
5337
5338 return true;
5339}
5340
5341// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5342static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5343 return Subtarget.isAIXABI() ||
5344 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5345}
5346
5348 const Function &Caller, const SDValue &Callee,
5349 const PPCSubtarget &Subtarget,
5350 const TargetMachine &TM,
5351 bool IsStrictFPCall = false) {
5352 if (CFlags.IsTailCall)
5353 return PPCISD::TC_RETURN;
5354
5355 unsigned RetOpc = 0;
5356 // This is a call through a function pointer.
5357 if (CFlags.IsIndirect) {
5358 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5359 // indirect calls. The save of the caller's TOC pointer to the stack will be
5360 // inserted into the DAG as part of call lowering. The restore of the TOC
5361 // pointer is modeled by using a pseudo instruction for the call opcode that
5362 // represents the 2 instruction sequence of an indirect branch and link,
5363 // immediately followed by a load of the TOC pointer from the stack save
5364 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5365 // as it is not saved or used.
5366 if (Subtarget.usePointerGlueHelper())
5367 RetOpc = PPCISD::BL_LOAD_TOC;
5368 else
5369 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5370 : PPCISD::BCTRL;
5371 } else if (Subtarget.isUsingPCRelativeCalls()) {
5372 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5373 RetOpc = PPCISD::CALL_NOTOC;
5374 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5375 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5376 // immediately following the call instruction if the caller and callee may
5377 // have different TOC bases. At link time if the linker determines the calls
5378 // may not share a TOC base, the call is redirected to a trampoline inserted
5379 // by the linker. The trampoline will (among other things) save the callers
5380 // TOC pointer at an ABI designated offset in the linkage area and the
5381 // linker will rewrite the nop to be a load of the TOC pointer from the
5382 // linkage area into gpr2.
5383 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5384 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5385 RetOpc =
5386 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5387 } else
5388 RetOpc = PPCISD::CALL;
5389 if (IsStrictFPCall) {
5390 switch (RetOpc) {
5391 default:
5392 llvm_unreachable("Unknown call opcode");
5393 case PPCISD::BCTRL_LOAD_TOC:
5394 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5395 break;
5396 case PPCISD::BCTRL:
5397 RetOpc = PPCISD::BCTRL_RM;
5398 break;
5399 case PPCISD::BL_LOAD_TOC:
5400 RetOpc = PPCISD::BL_LOAD_TOC_RM;
5401 break;
5402 case PPCISD::CALL_NOTOC:
5403 RetOpc = PPCISD::CALL_NOTOC_RM;
5404 break;
5405 case PPCISD::CALL:
5406 RetOpc = PPCISD::CALL_RM;
5407 break;
5408 case PPCISD::CALL_NOP:
5409 RetOpc = PPCISD::CALL_NOP_RM;
5410 break;
5411 }
5412 }
5413 return RetOpc;
5414}
5415
5416static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5417 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5418 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5419 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5420 return SDValue(Dest, 0);
5421
5422 // Returns true if the callee is local, and false otherwise.
5423 auto isLocalCallee = [&]() {
5425 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5426
5427 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5429 };
5430
5431 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5432 // a static relocation model causes some versions of GNU LD (2.17.50, at
5433 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5434 // built with secure-PLT.
5435 bool UsePlt =
5436 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5438
5439 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5440 const TargetMachine &TM = Subtarget.getTargetMachine();
5442 auto *S =
5443 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5444
5446 return DAG.getMCSymbol(S, PtrVT);
5447 };
5448
5449 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5450 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5451 if (isFunctionGlobalAddress(GV)) {
5452 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5453
5454 if (Subtarget.isAIXABI()) {
5455 return getAIXFuncEntryPointSymbolSDNode(GV);
5456 }
5457 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5458 UsePlt ? PPCII::MO_PLT : 0);
5459 }
5460
5462 const char *SymName = S->getSymbol();
5463 if (Subtarget.isAIXABI()) {
5464 // If there exists a user-declared function whose name is the same as the
5465 // ExternalSymbol's, then we pick up the user-declared version.
5467 if (const Function *F =
5468 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5469 return getAIXFuncEntryPointSymbolSDNode(F);
5470
5471 // On AIX, direct function calls reference the symbol for the function's
5472 // entry point, which is named by prepending a "." before the function's
5473 // C-linkage name. A Qualname is returned here because an external
5474 // function entry point is a csect with XTY_ER property.
5475 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5476 auto &Context = DAG.getMachineFunction().getContext();
5477 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5478 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5480 return Sec->getQualNameSymbol();
5481 };
5482
5483 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5484 }
5485 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5486 UsePlt ? PPCII::MO_PLT : 0);
5487 }
5488
5489 // No transformation needed.
5490 assert(Callee.getNode() && "What no callee?");
5491 return Callee;
5492}
5493
5495 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5496 "Expected a CALLSEQ_STARTSDNode.");
5497
5498 // The last operand is the chain, except when the node has glue. If the node
5499 // has glue, then the last operand is the glue, and the chain is the second
5500 // last operand.
5501 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5502 if (LastValue.getValueType() != MVT::Glue)
5503 return LastValue;
5504
5505 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5506}
5507
5508// Creates the node that moves a functions address into the count register
5509// to prepare for an indirect call instruction.
5510static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5511 SDValue &Glue, SDValue &Chain,
5512 const SDLoc &dl) {
5513 SDValue MTCTROps[] = {Chain, Callee, Glue};
5514 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5515 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5516 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5517 // The glue is the second value produced.
5518 Glue = Chain.getValue(1);
5519}
5520
5522 SDValue &Glue, SDValue &Chain,
5523 SDValue CallSeqStart,
5524 const CallBase *CB, const SDLoc &dl,
5525 bool hasNest,
5526 const PPCSubtarget &Subtarget) {
5527 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5528 // entry point, but to the function descriptor (the function entry point
5529 // address is part of the function descriptor though).
5530 // The function descriptor is a three doubleword structure with the
5531 // following fields: function entry point, TOC base address and
5532 // environment pointer.
5533 // Thus for a call through a function pointer, the following actions need
5534 // to be performed:
5535 // 1. Save the TOC of the caller in the TOC save area of its stack
5536 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5537 // 2. Load the address of the function entry point from the function
5538 // descriptor.
5539 // 3. Load the TOC of the callee from the function descriptor into r2.
5540 // 4. Load the environment pointer from the function descriptor into
5541 // r11.
5542 // 5. Branch to the function entry point address.
5543 // 6. On return of the callee, the TOC of the caller needs to be
5544 // restored (this is done in FinishCall()).
5545 //
5546 // The loads are scheduled at the beginning of the call sequence, and the
5547 // register copies are flagged together to ensure that no other
5548 // operations can be scheduled in between. E.g. without flagging the
5549 // copies together, a TOC access in the caller could be scheduled between
5550 // the assignment of the callee TOC and the branch to the callee, which leads
5551 // to incorrect code.
5552
5553 // Start by loading the function address from the descriptor.
5554 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5555 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5559
5560 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5561
5562 // Registers used in building the DAG.
5563 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5564 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5565
5566 // Offsets of descriptor members.
5567 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5568 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5569
5570 const MVT RegVT = Subtarget.getScalarIntVT();
5571 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5572
5573 // One load for the functions entry point address.
5574 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5575 Alignment, MMOFlags);
5576
5577 // One for loading the TOC anchor for the module that contains the called
5578 // function.
5579 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5580 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5581 SDValue TOCPtr =
5582 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5583 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5584
5585 // One for loading the environment pointer.
5586 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5587 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5588 SDValue LoadEnvPtr =
5589 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5590 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5591
5592
5593 // Then copy the newly loaded TOC anchor to the TOC pointer.
5594 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5595 Chain = TOCVal.getValue(0);
5596 Glue = TOCVal.getValue(1);
5597
5598 // If the function call has an explicit 'nest' parameter, it takes the
5599 // place of the environment pointer.
5600 assert((!hasNest || !Subtarget.isAIXABI()) &&
5601 "Nest parameter is not supported on AIX.");
5602 if (!hasNest) {
5603 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5604 Chain = EnvVal.getValue(0);
5605 Glue = EnvVal.getValue(1);
5606 }
5607
5608 // The rest of the indirect call sequence is the same as the non-descriptor
5609 // DAG.
5610 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5611}
5612
5614 SDValue &Glue, SDValue &Chain,
5615 SDValue CallSeqStart, const CallBase *CB,
5616 const SDLoc &dl, bool hasNest,
5617 const PPCSubtarget &Subtarget) {
5618 // On AIX there is a feature ("out of line glue code") which uses a special
5619 // trampoline function ._ptrgl to do the indirect call. If this option is
5620 // enabled we instead simply load the address of the descriptor into gpr11,
5621 // with the arguments in the 'normal' registers and branch to the ._ptrgl
5622 // stub.
5623 const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister();
5624 SDValue MoveToPhysicalReg =
5625 DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue);
5626 Chain = MoveToPhysicalReg.getValue(0);
5627 Glue = MoveToPhysicalReg.getValue(1);
5628}
5629
5630static void
5632 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5633 SelectionDAG &DAG,
5634 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5635 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5636 const PPCSubtarget &Subtarget) {
5637 const bool IsPPC64 = Subtarget.isPPC64();
5638 // MVT for a general purpose register.
5639 const MVT RegVT = Subtarget.getScalarIntVT();
5640
5641 // First operand is always the chain.
5642 Ops.push_back(Chain);
5643
5644 // If it's a direct call pass the callee as the second operand.
5645 if (!CFlags.IsIndirect)
5646 Ops.push_back(Callee);
5647 else if (Subtarget.usePointerGlueHelper()) {
5648 Ops.push_back(Callee);
5649 // Add the register used to pass the descriptor address.
5650 Ops.push_back(
5651 DAG.getRegister(Subtarget.getGlueCodeDescriptorRegister(), RegVT));
5652 } else {
5653 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5654
5655 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5656 // on the stack (this would have been done in `LowerCall_64SVR4` or
5657 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5658 // represents both the indirect branch and a load that restores the TOC
5659 // pointer from the linkage area. The operand for the TOC restore is an add
5660 // of the TOC save offset to the stack pointer. This must be the second
5661 // operand: after the chain input but before any other variadic arguments.
5662 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5663 // saved or used.
5664 if (isTOCSaveRestoreRequired(Subtarget)) {
5665 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5666
5667 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5668 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5669 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5670 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5671 Ops.push_back(AddTOC);
5672 }
5673
5674 // Add the register used for the environment pointer.
5675 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5676 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5677 RegVT));
5678
5679
5680 // Add CTR register as callee so a bctr can be emitted later.
5681 if (CFlags.IsTailCall)
5682 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5683 }
5684
5685 // If this is a tail call add stack pointer delta.
5686 if (CFlags.IsTailCall)
5687 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5688
5689 // Add argument registers to the end of the list so that they are known live
5690 // into the call.
5691 for (const auto &[Reg, N] : RegsToPass)
5692 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5693
5694 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5695 // no way to mark dependencies as implicit here.
5696 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5697 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5698 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5699 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5700
5701 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5702 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5703 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5704
5705 // Add a register mask operand representing the call-preserved registers.
5706 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5707 const uint32_t *Mask =
5708 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5709 assert(Mask && "Missing call preserved mask for calling convention");
5710 Ops.push_back(DAG.getRegisterMask(Mask));
5711
5712 // If the glue is valid, it is the last operand.
5713 if (Glue.getNode())
5714 Ops.push_back(Glue);
5715}
5716
5717SDValue PPCTargetLowering::FinishCall(
5718 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5719 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5720 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5721 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5722 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5723
5724 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5725 Subtarget.isAIXABI())
5726 setUsesTOCBasePtr(DAG);
5727
5728 unsigned CallOpc =
5729 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5730 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5731
5732 if (!CFlags.IsIndirect)
5733 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5734 else if (Subtarget.usesFunctionDescriptors()) {
5735 if (Subtarget.usePointerGlueHelper()) {
5736 prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
5737 CFlags.HasNest, Subtarget);
5738 SDValue PtrGlueCallee =
5739 DAG.getExternalSymbol("_ptrgl", getPointerTy(DAG.getDataLayout()));
5740 Callee = transformCallee(PtrGlueCallee, DAG, dl, Subtarget);
5741 } else {
5742 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5743 dl, CFlags.HasNest, Subtarget);
5744 }
5745 } else {
5746 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5747 }
5748
5749 // Build the operand list for the call instruction.
5751 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5752 SPDiff, Subtarget);
5753
5754 // Emit tail call.
5755 if (CFlags.IsTailCall) {
5756 // Indirect tail call when using PC Relative calls do not have the same
5757 // constraints.
5758 assert(((Callee.getOpcode() == ISD::Register &&
5759 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5760 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5761 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5762 isa<ConstantSDNode>(Callee) ||
5763 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5764 "Expecting a global address, external symbol, absolute value, "
5765 "register or an indirect tail call when PC Relative calls are "
5766 "used.");
5767 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5768 assert(CallOpc == PPCISD::TC_RETURN &&
5769 "Unexpected call opcode for a tail call.");
5771 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5772 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5773 return Ret;
5774 }
5775
5776 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5777 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5778 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5779 Glue = Chain.getValue(1);
5780
5781 // When performing tail call optimization the callee pops its arguments off
5782 // the stack. Account for this here so these bytes can be pushed back on in
5783 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5784 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5786 ? NumBytes
5787 : 0;
5788
5789 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5790 Glue = Chain.getValue(1);
5791
5792 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5793 DAG, InVals);
5794}
5795
5797 CallingConv::ID CalleeCC = CB->getCallingConv();
5798 const Function *CallerFunc = CB->getCaller();
5799 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5800 const Function *CalleeFunc = CB->getCalledFunction();
5801 if (!CalleeFunc)
5802 return false;
5803 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5804
5807
5808 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5809 CalleeFunc->getAttributes(), Outs, *this,
5810 CalleeFunc->getDataLayout());
5811
5812 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5813 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5814 false /*isCalleeExternalSymbol*/);
5815}
5816
5817bool PPCTargetLowering::isEligibleForTCO(
5818 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5819 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5821 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5822 bool isCalleeExternalSymbol) const {
5823 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5824 return false;
5825
5826 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5827 return IsEligibleForTailCallOptimization_64SVR4(
5828 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5829 isCalleeExternalSymbol);
5830 else
5831 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5832 isVarArg, Ins);
5833}
5834
5835SDValue
5836PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5837 SmallVectorImpl<SDValue> &InVals) const {
5838 SelectionDAG &DAG = CLI.DAG;
5839 SDLoc &dl = CLI.DL;
5841 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5843 SDValue Chain = CLI.Chain;
5844 SDValue Callee = CLI.Callee;
5845 bool &isTailCall = CLI.IsTailCall;
5846 CallingConv::ID CallConv = CLI.CallConv;
5847 bool isVarArg = CLI.IsVarArg;
5848 bool isPatchPoint = CLI.IsPatchPoint;
5849 const CallBase *CB = CLI.CB;
5850
5851 if (isTailCall) {
5853 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5854 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5855 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5856 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5857
5858 isTailCall =
5859 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5860 &(MF.getFunction()), IsCalleeExternalSymbol);
5861 if (isTailCall) {
5862 ++NumTailCalls;
5863 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5864 ++NumSiblingCalls;
5865
5866 // PC Relative calls no longer guarantee that the callee is a Global
5867 // Address Node. The callee could be an indirect tail call in which
5868 // case the SDValue for the callee could be a load (to load the address
5869 // of a function pointer) or it may be a register copy (to move the
5870 // address of the callee from a function parameter into a virtual
5871 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5872 assert((Subtarget.isUsingPCRelativeCalls() ||
5873 isa<GlobalAddressSDNode>(Callee)) &&
5874 "Callee should be an llvm::Function object.");
5875
5876 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5877 << "\nTCO callee: ");
5878 LLVM_DEBUG(Callee.dump());
5879 }
5880 }
5881
5882 if (!isTailCall && CB && CB->isMustTailCall())
5883 report_fatal_error("failed to perform tail call elimination on a call "
5884 "site marked musttail");
5885
5886 // When long calls (i.e. indirect calls) are always used, calls are always
5887 // made via function pointer. If we have a function name, first translate it
5888 // into a pointer.
5889 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5890 !isTailCall)
5891 Callee = LowerGlobalAddress(Callee, DAG);
5892
5893 CallFlags CFlags(
5894 CallConv, isTailCall, isVarArg, isPatchPoint,
5895 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5896 // hasNest
5897 Subtarget.is64BitELFABI() &&
5898 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5899 CLI.NoMerge);
5900
5901 if (Subtarget.isAIXABI())
5902 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5903 InVals, CB);
5904
5905 assert(Subtarget.isSVR4ABI());
5906 if (Subtarget.isPPC64())
5907 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5908 InVals, CB);
5909 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5910 InVals, CB);
5911}
5912
5913SDValue PPCTargetLowering::LowerCall_32SVR4(
5914 SDValue Chain, SDValue Callee, CallFlags CFlags,
5916 const SmallVectorImpl<SDValue> &OutVals,
5917 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5919 const CallBase *CB) const {
5920 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5921 // of the 32-bit SVR4 ABI stack frame layout.
5922
5923 const CallingConv::ID CallConv = CFlags.CallConv;
5924 const bool IsVarArg = CFlags.IsVarArg;
5925 const bool IsTailCall = CFlags.IsTailCall;
5926
5927 assert((CallConv == CallingConv::C ||
5928 CallConv == CallingConv::Cold ||
5929 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5930
5931 const Align PtrAlign(4);
5932
5933 MachineFunction &MF = DAG.getMachineFunction();
5934
5935 // Mark this function as potentially containing a function that contains a
5936 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5937 // and restoring the callers stack pointer in this functions epilog. This is
5938 // done because by tail calling the called function might overwrite the value
5939 // in this function's (MF) stack pointer stack slot 0(SP).
5940 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5941 CallConv == CallingConv::Fast)
5942 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5943
5944 // Count how many bytes are to be pushed on the stack, including the linkage
5945 // area, parameter list area and the part of the local variable space which
5946 // contains copies of aggregates which are passed by value.
5947
5948 // Assign locations to all of the outgoing arguments.
5950 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5951
5952 // Reserve space for the linkage area on the stack.
5953 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5954 PtrAlign);
5955
5956 if (IsVarArg) {
5957 // Handle fixed and variable vector arguments differently.
5958 // Fixed vector arguments go into registers as long as registers are
5959 // available. Variable vector arguments always go into memory.
5960 unsigned NumArgs = Outs.size();
5961
5962 for (unsigned i = 0; i != NumArgs; ++i) {
5963 MVT ArgVT = Outs[i].VT;
5964 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5965 bool Result;
5966
5967 if (!ArgFlags.isVarArg()) {
5968 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5969 Outs[i].OrigTy, CCInfo);
5970 } else {
5972 ArgFlags, Outs[i].OrigTy, CCInfo);
5973 }
5974
5975 if (Result) {
5976#ifndef NDEBUG
5977 errs() << "Call operand #" << i << " has unhandled type "
5978 << ArgVT << "\n";
5979#endif
5980 llvm_unreachable(nullptr);
5981 }
5982 }
5983 } else {
5984 // All arguments are treated the same.
5985 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5986 }
5987
5988 // Assign locations to all of the outgoing aggregate by value arguments.
5989 SmallVector<CCValAssign, 16> ByValArgLocs;
5990 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5991
5992 // Reserve stack space for the allocations in CCInfo.
5993 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5994
5995 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5996
5997 // Size of the linkage area, parameter list area and the part of the local
5998 // space variable where copies of aggregates which are passed by value are
5999 // stored.
6000 unsigned NumBytes = CCByValInfo.getStackSize();
6001
6002 // Calculate by how many bytes the stack has to be adjusted in case of tail
6003 // call optimization.
6004 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6005
6006 // Adjust the stack pointer for the new arguments...
6007 // These operations are automatically eliminated by the prolog/epilog pass
6008 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6009 SDValue CallSeqStart = Chain;
6010
6011 // Load the return address and frame pointer so it can be moved somewhere else
6012 // later.
6013 SDValue LROp, FPOp;
6014 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6015
6016 // Set up a copy of the stack pointer for use loading and storing any
6017 // arguments that may not fit in the registers available for argument
6018 // passing.
6019 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6020
6022 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6023 SmallVector<SDValue, 8> MemOpChains;
6024
6025 bool seenFloatArg = false;
6026 // Walk the register/memloc assignments, inserting copies/loads.
6027 // i - Tracks the index into the list of registers allocated for the call
6028 // RealArgIdx - Tracks the index into the list of actual function arguments
6029 // j - Tracks the index into the list of byval arguments
6030 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6031 i != e;
6032 ++i, ++RealArgIdx) {
6033 CCValAssign &VA = ArgLocs[i];
6034 SDValue Arg = OutVals[RealArgIdx];
6035 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6036
6037 if (Flags.isByVal()) {
6038 // Argument is an aggregate which is passed by value, thus we need to
6039 // create a copy of it in the local variable space of the current stack
6040 // frame (which is the stack frame of the caller) and pass the address of
6041 // this copy to the callee.
6042 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6043 CCValAssign &ByValVA = ByValArgLocs[j++];
6044 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6045
6046 // Memory reserved in the local variable space of the callers stack frame.
6047 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6048
6049 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6050 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6051 StackPtr, PtrOff);
6052
6053 // Create a copy of the argument in the local area of the current
6054 // stack frame.
6055 SDValue MemcpyCall =
6056 CreateCopyOfByValArgument(Arg, PtrOff,
6057 CallSeqStart.getNode()->getOperand(0),
6058 Flags, DAG, dl);
6059
6060 // This must go outside the CALLSEQ_START..END.
6061 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6062 SDLoc(MemcpyCall));
6063 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6064 NewCallSeqStart.getNode());
6065 Chain = CallSeqStart = NewCallSeqStart;
6066
6067 // Pass the address of the aggregate copy on the stack either in a
6068 // physical register or in the parameter list area of the current stack
6069 // frame to the callee.
6070 Arg = PtrOff;
6071 }
6072
6073 // When useCRBits() is true, there can be i1 arguments.
6074 // It is because getRegisterType(MVT::i1) => MVT::i1,
6075 // and for other integer types getRegisterType() => MVT::i32.
6076 // Extend i1 and ensure callee will get i32.
6077 if (Arg.getValueType() == MVT::i1)
6078 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6079 dl, MVT::i32, Arg);
6080
6081 if (VA.isRegLoc()) {
6082 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6083 // Put argument in a physical register.
6084 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6085 bool IsLE = Subtarget.isLittleEndian();
6086 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6087 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6088 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6089 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6090 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6091 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6092 SVal.getValue(0)));
6093 } else
6094 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6095 } else {
6096 // Put argument in the parameter list area of the current stack frame.
6097 assert(VA.isMemLoc());
6098 unsigned LocMemOffset = VA.getLocMemOffset();
6099
6100 if (!IsTailCall) {
6101 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6102 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6103 StackPtr, PtrOff);
6104
6105 MemOpChains.push_back(
6106 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6107 } else {
6108 // Calculate and remember argument location.
6109 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6110 TailCallArguments);
6111 }
6112 }
6113 }
6114
6115 if (!MemOpChains.empty())
6116 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6117
6118 // Build a sequence of copy-to-reg nodes chained together with token chain
6119 // and flag operands which copy the outgoing args into the appropriate regs.
6120 SDValue InGlue;
6121 for (const auto &[Reg, N] : RegsToPass) {
6122 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6123 InGlue = Chain.getValue(1);
6124 }
6125
6126 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6127 // registers.
6128 if (IsVarArg) {
6129 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6130 SDValue Ops[] = { Chain, InGlue };
6131
6132 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6133 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6134
6135 InGlue = Chain.getValue(1);
6136 }
6137
6138 if (IsTailCall)
6139 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6140 TailCallArguments);
6141
6142 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6143 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6144}
6145
6146// Copy an argument into memory, being careful to do this outside the
6147// call sequence for the call to which the argument belongs.
6148SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6149 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6150 SelectionDAG &DAG, const SDLoc &dl) const {
6151 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6152 CallSeqStart.getNode()->getOperand(0),
6153 Flags, DAG, dl);
6154 // The MEMCPY must go outside the CALLSEQ_START..END.
6155 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6156 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6157 SDLoc(MemcpyCall));
6158 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6159 NewCallSeqStart.getNode());
6160 return NewCallSeqStart;
6161}
6162
6163SDValue PPCTargetLowering::LowerCall_64SVR4(
6164 SDValue Chain, SDValue Callee, CallFlags CFlags,
6166 const SmallVectorImpl<SDValue> &OutVals,
6167 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6169 const CallBase *CB) const {
6170 bool isELFv2ABI = Subtarget.isELFv2ABI();
6171 bool isLittleEndian = Subtarget.isLittleEndian();
6172 unsigned NumOps = Outs.size();
6173 bool IsSibCall = false;
6174 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6175
6176 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6177 unsigned PtrByteSize = 8;
6178
6179 MachineFunction &MF = DAG.getMachineFunction();
6180
6181 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6182 IsSibCall = true;
6183
6184 // Mark this function as potentially containing a function that contains a
6185 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6186 // and restoring the callers stack pointer in this functions epilog. This is
6187 // done because by tail calling the called function might overwrite the value
6188 // in this function's (MF) stack pointer stack slot 0(SP).
6189 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6190 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6191
6192 assert(!(IsFastCall && CFlags.IsVarArg) &&
6193 "fastcc not supported on varargs functions");
6194
6195 // Count how many bytes are to be pushed on the stack, including the linkage
6196 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6197 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6198 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6199 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6200 unsigned NumBytes = LinkageSize;
6201 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6202
6203 static const MCPhysReg GPR[] = {
6204 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6205 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6206 };
6207 static const MCPhysReg VR[] = {
6208 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6209 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6210 };
6211
6212 const unsigned NumGPRs = std::size(GPR);
6213 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6214 const unsigned NumVRs = std::size(VR);
6215
6216 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6217 // can be passed to the callee in registers.
6218 // For the fast calling convention, there is another check below.
6219 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6220 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6221 if (!HasParameterArea) {
6222 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6223 unsigned AvailableFPRs = NumFPRs;
6224 unsigned AvailableVRs = NumVRs;
6225 unsigned NumBytesTmp = NumBytes;
6226 for (unsigned i = 0; i != NumOps; ++i) {
6227 if (Outs[i].Flags.isNest()) continue;
6228 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6229 PtrByteSize, LinkageSize, ParamAreaSize,
6230 NumBytesTmp, AvailableFPRs, AvailableVRs))
6231 HasParameterArea = true;
6232 }
6233 }
6234
6235 // When using the fast calling convention, we don't provide backing for
6236 // arguments that will be in registers.
6237 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6238
6239 // Avoid allocating parameter area for fastcc functions if all the arguments
6240 // can be passed in the registers.
6241 if (IsFastCall)
6242 HasParameterArea = false;
6243
6244 // Add up all the space actually used.
6245 for (unsigned i = 0; i != NumOps; ++i) {
6246 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6247 EVT ArgVT = Outs[i].VT;
6248 EVT OrigVT = Outs[i].ArgVT;
6249
6250 if (Flags.isNest())
6251 continue;
6252
6253 if (IsFastCall) {
6254 if (Flags.isByVal()) {
6255 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6256 if (NumGPRsUsed > NumGPRs)
6257 HasParameterArea = true;
6258 } else {
6259 switch (ArgVT.getSimpleVT().SimpleTy) {
6260 default: llvm_unreachable("Unexpected ValueType for argument!");
6261 case MVT::i1:
6262 case MVT::i32:
6263 case MVT::i64:
6264 if (++NumGPRsUsed <= NumGPRs)
6265 continue;
6266 break;
6267 case MVT::v4i32:
6268 case MVT::v8i16:
6269 case MVT::v16i8:
6270 case MVT::v2f64:
6271 case MVT::v2i64:
6272 case MVT::v1i128:
6273 case MVT::f128:
6274 if (++NumVRsUsed <= NumVRs)
6275 continue;
6276 break;
6277 case MVT::v4f32:
6278 if (++NumVRsUsed <= NumVRs)
6279 continue;
6280 break;
6281 case MVT::f32:
6282 case MVT::f64:
6283 if (++NumFPRsUsed <= NumFPRs)
6284 continue;
6285 break;
6286 }
6287 HasParameterArea = true;
6288 }
6289 }
6290
6291 /* Respect alignment of argument on the stack. */
6292 auto Alignement =
6293 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6294 NumBytes = alignTo(NumBytes, Alignement);
6295
6296 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6297 if (Flags.isInConsecutiveRegsLast())
6298 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6299 }
6300
6301 unsigned NumBytesActuallyUsed = NumBytes;
6302
6303 // In the old ELFv1 ABI,
6304 // the prolog code of the callee may store up to 8 GPR argument registers to
6305 // the stack, allowing va_start to index over them in memory if its varargs.
6306 // Because we cannot tell if this is needed on the caller side, we have to
6307 // conservatively assume that it is needed. As such, make sure we have at
6308 // least enough stack space for the caller to store the 8 GPRs.
6309 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6310 // really requires memory operands, e.g. a vararg function.
6311 if (HasParameterArea)
6312 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6313 else
6314 NumBytes = LinkageSize;
6315
6316 // Tail call needs the stack to be aligned.
6317 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6318 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6319
6320 int SPDiff = 0;
6321
6322 // Calculate by how many bytes the stack has to be adjusted in case of tail
6323 // call optimization.
6324 if (!IsSibCall)
6325 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6326
6327 // To protect arguments on the stack from being clobbered in a tail call,
6328 // force all the loads to happen before doing any other lowering.
6329 if (CFlags.IsTailCall)
6330 Chain = DAG.getStackArgumentTokenFactor(Chain);
6331
6332 // Adjust the stack pointer for the new arguments...
6333 // These operations are automatically eliminated by the prolog/epilog pass
6334 if (!IsSibCall)
6335 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6336 SDValue CallSeqStart = Chain;
6337
6338 // Load the return address and frame pointer so it can be move somewhere else
6339 // later.
6340 SDValue LROp, FPOp;
6341 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6342
6343 // Set up a copy of the stack pointer for use loading and storing any
6344 // arguments that may not fit in the registers available for argument
6345 // passing.
6346 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6347
6348 // Figure out which arguments are going to go in registers, and which in
6349 // memory. Also, if this is a vararg function, floating point operations
6350 // must be stored to our stack, and loaded into integer regs as well, if
6351 // any integer regs are available for argument passing.
6352 unsigned ArgOffset = LinkageSize;
6353
6355 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6356
6357 SmallVector<SDValue, 8> MemOpChains;
6358 for (unsigned i = 0; i != NumOps; ++i) {
6359 SDValue Arg = OutVals[i];
6360 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6361 EVT ArgVT = Outs[i].VT;
6362 EVT OrigVT = Outs[i].ArgVT;
6363
6364 // PtrOff will be used to store the current argument to the stack if a
6365 // register cannot be found for it.
6366 SDValue PtrOff;
6367
6368 // We re-align the argument offset for each argument, except when using the
6369 // fast calling convention, when we need to make sure we do that only when
6370 // we'll actually use a stack slot.
6371 auto ComputePtrOff = [&]() {
6372 /* Respect alignment of argument on the stack. */
6373 auto Alignment =
6374 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6375 ArgOffset = alignTo(ArgOffset, Alignment);
6376
6377 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6378
6379 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6380 };
6381
6382 if (!IsFastCall) {
6383 ComputePtrOff();
6384
6385 /* Compute GPR index associated with argument offset. */
6386 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6387 GPR_idx = std::min(GPR_idx, NumGPRs);
6388 }
6389
6390 // Promote integers to 64-bit values.
6391 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6392 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6393 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6394 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6395 }
6396
6397 // FIXME memcpy is used way more than necessary. Correctness first.
6398 // Note: "by value" is code for passing a structure by value, not
6399 // basic types.
6400 if (Flags.isByVal()) {
6401 // Note: Size includes alignment padding, so
6402 // struct x { short a; char b; }
6403 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6404 // These are the proper values we need for right-justifying the
6405 // aggregate in a parameter register.
6406 unsigned Size = Flags.getByValSize();
6407
6408 // An empty aggregate parameter takes up no storage and no
6409 // registers.
6410 if (Size == 0)
6411 continue;
6412
6413 if (IsFastCall)
6414 ComputePtrOff();
6415
6416 // All aggregates smaller than 8 bytes must be passed right-justified.
6417 if (Size==1 || Size==2 || Size==4) {
6418 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6419 if (GPR_idx != NumGPRs) {
6420 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6421 MachinePointerInfo(), VT);
6422 MemOpChains.push_back(Load.getValue(1));
6423 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6424
6425 ArgOffset += PtrByteSize;
6426 continue;
6427 }
6428 }
6429
6430 if (GPR_idx == NumGPRs && Size < 8) {
6431 SDValue AddPtr = PtrOff;
6432 if (!isLittleEndian) {
6433 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6434 PtrOff.getValueType());
6435 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6436 }
6437 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6438 CallSeqStart,
6439 Flags, DAG, dl);
6440 ArgOffset += PtrByteSize;
6441 continue;
6442 }
6443 // Copy the object to parameter save area if it can not be entirely passed
6444 // by registers.
6445 // FIXME: we only need to copy the parts which need to be passed in
6446 // parameter save area. For the parts passed by registers, we don't need
6447 // to copy them to the stack although we need to allocate space for them
6448 // in parameter save area.
6449 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6450 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6451 CallSeqStart,
6452 Flags, DAG, dl);
6453
6454 // When a register is available, pass a small aggregate right-justified.
6455 if (Size < 8 && GPR_idx != NumGPRs) {
6456 // The easiest way to get this right-justified in a register
6457 // is to copy the structure into the rightmost portion of a
6458 // local variable slot, then load the whole slot into the
6459 // register.
6460 // FIXME: The memcpy seems to produce pretty awful code for
6461 // small aggregates, particularly for packed ones.
6462 // FIXME: It would be preferable to use the slot in the
6463 // parameter save area instead of a new local variable.
6464 SDValue AddPtr = PtrOff;
6465 if (!isLittleEndian) {
6466 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6467 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6468 }
6469 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6470 CallSeqStart,
6471 Flags, DAG, dl);
6472
6473 // Load the slot into the register.
6474 SDValue Load =
6475 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6476 MemOpChains.push_back(Load.getValue(1));
6477 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6478
6479 // Done with this argument.
6480 ArgOffset += PtrByteSize;
6481 continue;
6482 }
6483
6484 // For aggregates larger than PtrByteSize, copy the pieces of the
6485 // object that fit into registers from the parameter save area.
6486 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6487 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6488 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6489 if (GPR_idx != NumGPRs) {
6490 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6491 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6492 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6493 MachinePointerInfo(), ObjType);
6494
6495 MemOpChains.push_back(Load.getValue(1));
6496 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6497 ArgOffset += PtrByteSize;
6498 } else {
6499 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6500 break;
6501 }
6502 }
6503 continue;
6504 }
6505
6506 switch (Arg.getSimpleValueType().SimpleTy) {
6507 default: llvm_unreachable("Unexpected ValueType for argument!");
6508 case MVT::i1:
6509 case MVT::i32:
6510 case MVT::i64:
6511 if (Flags.isNest()) {
6512 // The 'nest' parameter, if any, is passed in R11.
6513 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6514 break;
6515 }
6516
6517 // These can be scalar arguments or elements of an integer array type
6518 // passed directly. Clang may use those instead of "byval" aggregate
6519 // types to avoid forcing arguments to memory unnecessarily.
6520 if (GPR_idx != NumGPRs) {
6521 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6522 } else {
6523 if (IsFastCall)
6524 ComputePtrOff();
6525
6526 assert(HasParameterArea &&
6527 "Parameter area must exist to pass an argument in memory.");
6528 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6529 true, CFlags.IsTailCall, false, MemOpChains,
6530 TailCallArguments, dl);
6531 if (IsFastCall)
6532 ArgOffset += PtrByteSize;
6533 }
6534 if (!IsFastCall)
6535 ArgOffset += PtrByteSize;
6536 break;
6537 case MVT::f32:
6538 case MVT::f64: {
6539 // These can be scalar arguments or elements of a float array type
6540 // passed directly. The latter are used to implement ELFv2 homogenous
6541 // float aggregates.
6542
6543 // Named arguments go into FPRs first, and once they overflow, the
6544 // remaining arguments go into GPRs and then the parameter save area.
6545 // Unnamed arguments for vararg functions always go to GPRs and
6546 // then the parameter save area. For now, put all arguments to vararg
6547 // routines always in both locations (FPR *and* GPR or stack slot).
6548 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6549 bool NeededLoad = false;
6550
6551 // First load the argument into the next available FPR.
6552 if (FPR_idx != NumFPRs)
6553 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6554
6555 // Next, load the argument into GPR or stack slot if needed.
6556 if (!NeedGPROrStack)
6557 ;
6558 else if (GPR_idx != NumGPRs && !IsFastCall) {
6559 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6560 // once we support fp <-> gpr moves.
6561
6562 // In the non-vararg case, this can only ever happen in the
6563 // presence of f32 array types, since otherwise we never run
6564 // out of FPRs before running out of GPRs.
6565 SDValue ArgVal;
6566
6567 // Double values are always passed in a single GPR.
6568 if (Arg.getValueType() != MVT::f32) {
6569 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6570
6571 // Non-array float values are extended and passed in a GPR.
6572 } else if (!Flags.isInConsecutiveRegs()) {
6573 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6574 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6575
6576 // If we have an array of floats, we collect every odd element
6577 // together with its predecessor into one GPR.
6578 } else if (ArgOffset % PtrByteSize != 0) {
6579 SDValue Lo, Hi;
6580 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6581 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6582 if (!isLittleEndian)
6583 std::swap(Lo, Hi);
6584 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6585
6586 // The final element, if even, goes into the first half of a GPR.
6587 } else if (Flags.isInConsecutiveRegsLast()) {
6588 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6589 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6590 if (!isLittleEndian)
6591 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6592 DAG.getConstant(32, dl, MVT::i32));
6593
6594 // Non-final even elements are skipped; they will be handled
6595 // together the with subsequent argument on the next go-around.
6596 } else
6597 ArgVal = SDValue();
6598
6599 if (ArgVal.getNode())
6600 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6601 } else {
6602 if (IsFastCall)
6603 ComputePtrOff();
6604
6605 // Single-precision floating-point values are mapped to the
6606 // second (rightmost) word of the stack doubleword.
6607 if (Arg.getValueType() == MVT::f32 &&
6608 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6609 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6610 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6611 }
6612
6613 assert(HasParameterArea &&
6614 "Parameter area must exist to pass an argument in memory.");
6615 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6616 true, CFlags.IsTailCall, false, MemOpChains,
6617 TailCallArguments, dl);
6618
6619 NeededLoad = true;
6620 }
6621 // When passing an array of floats, the array occupies consecutive
6622 // space in the argument area; only round up to the next doubleword
6623 // at the end of the array. Otherwise, each float takes 8 bytes.
6624 if (!IsFastCall || NeededLoad) {
6625 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6626 Flags.isInConsecutiveRegs()) ? 4 : 8;
6627 if (Flags.isInConsecutiveRegsLast())
6628 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6629 }
6630 break;
6631 }
6632 case MVT::v4f32:
6633 case MVT::v4i32:
6634 case MVT::v8i16:
6635 case MVT::v16i8:
6636 case MVT::v2f64:
6637 case MVT::v2i64:
6638 case MVT::v1i128:
6639 case MVT::f128:
6640 // These can be scalar arguments or elements of a vector array type
6641 // passed directly. The latter are used to implement ELFv2 homogenous
6642 // vector aggregates.
6643
6644 // For a varargs call, named arguments go into VRs or on the stack as
6645 // usual; unnamed arguments always go to the stack or the corresponding
6646 // GPRs when within range. For now, we always put the value in both
6647 // locations (or even all three).
6648 if (CFlags.IsVarArg) {
6649 assert(HasParameterArea &&
6650 "Parameter area must exist if we have a varargs call.");
6651 // We could elide this store in the case where the object fits
6652 // entirely in R registers. Maybe later.
6653 SDValue Store =
6654 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6655 MemOpChains.push_back(Store);
6656 if (VR_idx != NumVRs) {
6657 SDValue Load =
6658 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6659 MemOpChains.push_back(Load.getValue(1));
6660 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6661 }
6662 ArgOffset += 16;
6663 for (unsigned i=0; i<16; i+=PtrByteSize) {
6664 if (GPR_idx == NumGPRs)
6665 break;
6666 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6667 DAG.getConstant(i, dl, PtrVT));
6668 SDValue Load =
6669 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6670 MemOpChains.push_back(Load.getValue(1));
6671 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6672 }
6673 break;
6674 }
6675
6676 // Non-varargs Altivec params go into VRs or on the stack.
6677 if (VR_idx != NumVRs) {
6678 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6679 } else {
6680 if (IsFastCall)
6681 ComputePtrOff();
6682
6683 assert(HasParameterArea &&
6684 "Parameter area must exist to pass an argument in memory.");
6685 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6686 true, CFlags.IsTailCall, true, MemOpChains,
6687 TailCallArguments, dl);
6688 if (IsFastCall)
6689 ArgOffset += 16;
6690 }
6691
6692 if (!IsFastCall)
6693 ArgOffset += 16;
6694 break;
6695 }
6696 }
6697
6698 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6699 "mismatch in size of parameter area");
6700 (void)NumBytesActuallyUsed;
6701
6702 if (!MemOpChains.empty())
6703 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6704
6705 // Check if this is an indirect call (MTCTR/BCTRL).
6706 // See prepareDescriptorIndirectCall and buildCallOperands for more
6707 // information about calls through function pointers in the 64-bit SVR4 ABI.
6708 if (CFlags.IsIndirect) {
6709 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6710 // caller in the TOC save area.
6711 if (isTOCSaveRestoreRequired(Subtarget)) {
6712 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6713 // Load r2 into a virtual register and store it to the TOC save area.
6714 setUsesTOCBasePtr(DAG);
6715 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6716 // TOC save area offset.
6717 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6718 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6719 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6720 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6722 DAG.getMachineFunction(), TOCSaveOffset));
6723 }
6724 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6725 // This does not mean the MTCTR instruction must use R12; it's easier
6726 // to model this as an extra parameter, so do that.
6727 if (isELFv2ABI && !CFlags.IsPatchPoint)
6728 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6729 }
6730
6731 // Build a sequence of copy-to-reg nodes chained together with token chain
6732 // and flag operands which copy the outgoing args into the appropriate regs.
6733 SDValue InGlue;
6734 for (const auto &[Reg, N] : RegsToPass) {
6735 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6736 InGlue = Chain.getValue(1);
6737 }
6738
6739 if (CFlags.IsTailCall && !IsSibCall)
6740 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6741 TailCallArguments);
6742
6743 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6744 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6745}
6746
6747// Returns true when the shadow of a general purpose argument register
6748// in the parameter save area is aligned to at least 'RequiredAlign'.
6749static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6750 assert(RequiredAlign.value() <= 16 &&
6751 "Required alignment greater than stack alignment.");
6752 switch (Reg) {
6753 default:
6754 report_fatal_error("called on invalid register.");
6755 case PPC::R5:
6756 case PPC::R9:
6757 case PPC::X3:
6758 case PPC::X5:
6759 case PPC::X7:
6760 case PPC::X9:
6761 // These registers are 16 byte aligned which is the most strict aligment
6762 // we can support.
6763 return true;
6764 case PPC::R3:
6765 case PPC::R7:
6766 case PPC::X4:
6767 case PPC::X6:
6768 case PPC::X8:
6769 case PPC::X10:
6770 // The shadow of these registers in the PSA is 8 byte aligned.
6771 return RequiredAlign <= 8;
6772 case PPC::R4:
6773 case PPC::R6:
6774 case PPC::R8:
6775 case PPC::R10:
6776 return RequiredAlign <= 4;
6777 }
6778}
6779
6780static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6781 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6782 Type *OrigTy, CCState &State) {
6783 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6784 State.getMachineFunction().getSubtarget());
6785 const bool IsPPC64 = Subtarget.isPPC64();
6786 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6787 const Align PtrAlign(PtrSize);
6788 const Align StackAlign(16);
6789 const MVT RegVT = Subtarget.getScalarIntVT();
6790
6791 if (ValVT == MVT::f128)
6792 report_fatal_error("f128 is unimplemented on AIX.");
6793
6794 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6795 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6796 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6797 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6798 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6799 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6800
6801 static const MCPhysReg VR[] = {// Vector registers.
6802 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6803 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6804 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6805
6806 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6807
6808 if (ArgFlags.isNest()) {
6809 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6810 if (!EnvReg)
6811 report_fatal_error("More then one nest argument.");
6812 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6813 return false;
6814 }
6815
6816 if (ArgFlags.isByVal()) {
6817 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6818 if (ByValAlign > StackAlign)
6819 report_fatal_error("Pass-by-value arguments with alignment greater than "
6820 "16 are not supported.");
6821
6822 const unsigned ByValSize = ArgFlags.getByValSize();
6823 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6824
6825 // An empty aggregate parameter takes up no storage and no registers,
6826 // but needs a MemLoc for a stack slot for the formal arguments side.
6827 if (ByValSize == 0) {
6829 State.getStackSize(), RegVT, LocInfo));
6830 return false;
6831 }
6832
6833 // Shadow allocate any registers that are not properly aligned.
6834 unsigned NextReg = State.getFirstUnallocated(GPRs);
6835 while (NextReg != GPRs.size() &&
6836 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6837 // Shadow allocate next registers since its aligment is not strict enough.
6838 MCRegister Reg = State.AllocateReg(GPRs);
6839 // Allocate the stack space shadowed by said register.
6840 State.AllocateStack(PtrSize, PtrAlign);
6841 assert(Reg && "Alocating register unexpectedly failed.");
6842 (void)Reg;
6843 NextReg = State.getFirstUnallocated(GPRs);
6844 }
6845
6846 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6847 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6848 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6849 if (MCRegister Reg = State.AllocateReg(GPRs))
6850 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6851 else {
6854 LocInfo));
6855 break;
6856 }
6857 }
6858 return false;
6859 }
6860
6861 // Arguments always reserve parameter save area.
6862 switch (ValVT.SimpleTy) {
6863 default:
6864 report_fatal_error("Unhandled value type for argument.");
6865 case MVT::i64:
6866 // i64 arguments should have been split to i32 for PPC32.
6867 assert(IsPPC64 && "PPC32 should have split i64 values.");
6868 [[fallthrough]];
6869 case MVT::i1:
6870 case MVT::i32: {
6871 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6872 // AIX integer arguments are always passed in register width.
6873 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6874 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6876 if (MCRegister Reg = State.AllocateReg(GPRs))
6877 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6878 else
6879 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6880
6881 return false;
6882 }
6883 case MVT::f32:
6884 case MVT::f64: {
6885 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6886 const unsigned StoreSize = LocVT.getStoreSize();
6887 // Floats are always 4-byte aligned in the PSA on AIX.
6888 // This includes f64 in 64-bit mode for ABI compatibility.
6889 const unsigned Offset =
6890 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6891 MCRegister FReg = State.AllocateReg(FPR);
6892 if (FReg)
6893 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6894
6895 // Reserve and initialize GPRs or initialize the PSA as required.
6896 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6897 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6898 assert(FReg && "An FPR should be available when a GPR is reserved.");
6899 if (State.isVarArg()) {
6900 // Successfully reserved GPRs are only initialized for vararg calls.
6901 // Custom handling is required for:
6902 // f64 in PPC32 needs to be split into 2 GPRs.
6903 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6904 State.addLoc(
6905 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6906 }
6907 } else {
6908 // If there are insufficient GPRs, the PSA needs to be initialized.
6909 // Initialization occurs even if an FPR was initialized for
6910 // compatibility with the AIX XL compiler. The full memory for the
6911 // argument will be initialized even if a prior word is saved in GPR.
6912 // A custom memLoc is used when the argument also passes in FPR so
6913 // that the callee handling can skip over it easily.
6914 State.addLoc(
6915 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6916 LocInfo)
6917 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6918 break;
6919 }
6920 }
6921
6922 return false;
6923 }
6924 case MVT::v4f32:
6925 case MVT::v4i32:
6926 case MVT::v8i16:
6927 case MVT::v16i8:
6928 case MVT::v2i64:
6929 case MVT::v2f64:
6930 case MVT::v1i128: {
6931 const unsigned VecSize = 16;
6932 const Align VecAlign(VecSize);
6933
6934 if (!State.isVarArg()) {
6935 // If there are vector registers remaining we don't consume any stack
6936 // space.
6937 if (MCRegister VReg = State.AllocateReg(VR)) {
6938 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6939 return false;
6940 }
6941 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6942 // might be allocated in the portion of the PSA that is shadowed by the
6943 // GPRs.
6944 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6945 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6946 return false;
6947 }
6948
6949 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6950 // Burn any underaligned registers and their shadowed stack space until
6951 // we reach the required alignment.
6952 while (NextRegIndex != GPRs.size() &&
6953 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6954 // Shadow allocate register and its stack shadow.
6955 MCRegister Reg = State.AllocateReg(GPRs);
6956 State.AllocateStack(PtrSize, PtrAlign);
6957 assert(Reg && "Allocating register unexpectedly failed.");
6958 (void)Reg;
6959 NextRegIndex = State.getFirstUnallocated(GPRs);
6960 }
6961
6962 // Vectors that are passed as fixed arguments are handled differently.
6963 // They are passed in VRs if any are available (unlike arguments passed
6964 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6965 // functions)
6966 if (!ArgFlags.isVarArg()) {
6967 if (MCRegister VReg = State.AllocateReg(VR)) {
6968 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6969 // Shadow allocate GPRs and stack space even though we pass in a VR.
6970 for (unsigned I = 0; I != VecSize; I += PtrSize)
6971 State.AllocateReg(GPRs);
6972 State.AllocateStack(VecSize, VecAlign);
6973 return false;
6974 }
6975 // No vector registers remain so pass on the stack.
6976 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6977 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6978 return false;
6979 }
6980
6981 // If all GPRS are consumed then we pass the argument fully on the stack.
6982 if (NextRegIndex == GPRs.size()) {
6983 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6984 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6985 return false;
6986 }
6987
6988 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6989 // half of the argument, and then need to pass the remaining half on the
6990 // stack.
6991 if (GPRs[NextRegIndex] == PPC::R9) {
6992 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6993 State.addLoc(
6994 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6995
6996 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6997 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6998 assert(FirstReg && SecondReg &&
6999 "Allocating R9 or R10 unexpectedly failed.");
7000 State.addLoc(
7001 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7002 State.addLoc(
7003 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7004 return false;
7005 }
7006
7007 // We have enough GPRs to fully pass the vector argument, and we have
7008 // already consumed any underaligned registers. Start with the custom
7009 // MemLoc and then the custom RegLocs.
7010 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7011 State.addLoc(
7012 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7013 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7014 const MCRegister Reg = State.AllocateReg(GPRs);
7015 assert(Reg && "Failed to allocated register for vararg vector argument");
7016 State.addLoc(
7017 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7018 }
7019 return false;
7020 }
7021 }
7022 return true;
7023}
7024
7025// So far, this function is only used by LowerFormalArguments_AIX()
7027 bool IsPPC64,
7028 bool HasP8Vector,
7029 bool HasVSX) {
7030 assert((IsPPC64 || SVT != MVT::i64) &&
7031 "i64 should have been split for 32-bit codegen.");
7032
7033 switch (SVT) {
7034 default:
7035 report_fatal_error("Unexpected value type for formal argument");
7036 case MVT::i1:
7037 case MVT::i32:
7038 case MVT::i64:
7039 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7040 case MVT::f32:
7041 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7042 case MVT::f64:
7043 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7044 case MVT::v4f32:
7045 case MVT::v4i32:
7046 case MVT::v8i16:
7047 case MVT::v16i8:
7048 case MVT::v2i64:
7049 case MVT::v2f64:
7050 case MVT::v1i128:
7051 return &PPC::VRRCRegClass;
7052 }
7053}
7054
7056 SelectionDAG &DAG, SDValue ArgValue,
7057 MVT LocVT, const SDLoc &dl) {
7058 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7059 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7060
7061 if (Flags.isSExt())
7062 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7063 DAG.getValueType(ValVT));
7064 else if (Flags.isZExt())
7065 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7066 DAG.getValueType(ValVT));
7067
7068 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7069}
7070
7071static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7072 const unsigned LASize = FL->getLinkageSize();
7073
7074 if (PPC::GPRCRegClass.contains(Reg)) {
7075 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7076 "Reg must be a valid argument register!");
7077 return LASize + 4 * (Reg - PPC::R3);
7078 }
7079
7080 if (PPC::G8RCRegClass.contains(Reg)) {
7081 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7082 "Reg must be a valid argument register!");
7083 return LASize + 8 * (Reg - PPC::X3);
7084 }
7085
7086 llvm_unreachable("Only general purpose registers expected.");
7087}
7088
7089// AIX ABI Stack Frame Layout:
7090//
7091// Low Memory +--------------------------------------------+
7092// SP +---> | Back chain | ---+
7093// | +--------------------------------------------+ |
7094// | | Saved Condition Register | |
7095// | +--------------------------------------------+ |
7096// | | Saved Linkage Register | |
7097// | +--------------------------------------------+ | Linkage Area
7098// | | Reserved for compilers | |
7099// | +--------------------------------------------+ |
7100// | | Reserved for binders | |
7101// | +--------------------------------------------+ |
7102// | | Saved TOC pointer | ---+
7103// | +--------------------------------------------+
7104// | | Parameter save area |
7105// | +--------------------------------------------+
7106// | | Alloca space |
7107// | +--------------------------------------------+
7108// | | Local variable space |
7109// | +--------------------------------------------+
7110// | | Float/int conversion temporary |
7111// | +--------------------------------------------+
7112// | | Save area for AltiVec registers |
7113// | +--------------------------------------------+
7114// | | AltiVec alignment padding |
7115// | +--------------------------------------------+
7116// | | Save area for VRSAVE register |
7117// | +--------------------------------------------+
7118// | | Save area for General Purpose registers |
7119// | +--------------------------------------------+
7120// | | Save area for Floating Point registers |
7121// | +--------------------------------------------+
7122// +---- | Back chain |
7123// High Memory +--------------------------------------------+
7124//
7125// Specifications:
7126// AIX 7.2 Assembler Language Reference
7127// Subroutine linkage convention
7128
7129SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7130 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7131 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7132 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7133
7134 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7135 CallConv == CallingConv::Fast) &&
7136 "Unexpected calling convention!");
7137
7138 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7139 report_fatal_error("Tail call support is unimplemented on AIX.");
7140
7141 if (useSoftFloat())
7142 report_fatal_error("Soft float support is unimplemented on AIX.");
7143
7144 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7145
7146 const bool IsPPC64 = Subtarget.isPPC64();
7147 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7148
7149 // Assign locations to all of the incoming arguments.
7151 MachineFunction &MF = DAG.getMachineFunction();
7152 MachineFrameInfo &MFI = MF.getFrameInfo();
7153 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7154 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7155
7156 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7157 // Reserve space for the linkage area on the stack.
7158 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7159 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7160 uint64_t SaveStackPos = CCInfo.getStackSize();
7161 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7162 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7163
7165
7166 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7167 CCValAssign &VA = ArgLocs[I++];
7168 MVT LocVT = VA.getLocVT();
7169 MVT ValVT = VA.getValVT();
7170 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7171
7172 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7173 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7174 // For compatibility with the AIX XL compiler, the float args in the
7175 // parameter save area are initialized even if the argument is available
7176 // in register. The caller is required to initialize both the register
7177 // and memory, however, the callee can choose to expect it in either.
7178 // The memloc is dismissed here because the argument is retrieved from
7179 // the register.
7180 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7181 continue;
7182
7183 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7184 const TargetRegisterClass *RegClass = getRegClassForSVT(
7185 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7186 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7187 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7188 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7189 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7190 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7191 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7192 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7193 MachinePointerInfo(), Align(PtrByteSize));
7194 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7195 MemOps.push_back(StoreReg);
7196 }
7197
7198 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7199 unsigned StoreSize =
7200 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7201 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7202 }
7203
7204 auto HandleMemLoc = [&]() {
7205 const unsigned LocSize = LocVT.getStoreSize();
7206 const unsigned ValSize = ValVT.getStoreSize();
7207 assert((ValSize <= LocSize) &&
7208 "Object size is larger than size of MemLoc");
7209 int CurArgOffset = VA.getLocMemOffset();
7210 // Objects are right-justified because AIX is big-endian.
7211 if (LocSize > ValSize)
7212 CurArgOffset += LocSize - ValSize;
7213 // Potential tail calls could cause overwriting of argument stack slots.
7214 const bool IsImmutable =
7216 (CallConv == CallingConv::Fast));
7217 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7218 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7219 SDValue ArgValue =
7220 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7221
7222 // While the ABI specifies the argument type is (sign or zero) extended
7223 // out to register width, not all code is compliant. We truncate and
7224 // re-extend to be more forgiving of these callers when the argument type
7225 // is smaller than register width.
7226 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7227 ValVT.isInteger() &&
7228 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7229 // It is possible to have either real integer values
7230 // or integers that were not originally integers.
7231 // In the latter case, these could have came from structs,
7232 // and these integers would not have an extend on the parameter.
7233 // Since these types of integers do not have an extend specified
7234 // in the first place, the type of extend that we do should not matter.
7235 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7236 ? MVT::i8
7237 : ArgVT;
7238 SDValue ArgValueTrunc =
7239 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7240 SDValue ArgValueExt =
7241 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7242 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7243 InVals.push_back(ArgValueExt);
7244 } else {
7245 InVals.push_back(ArgValue);
7246 }
7247 };
7248
7249 // Vector arguments to VaArg functions are passed both on the stack, and
7250 // in any available GPRs. Load the value from the stack and add the GPRs
7251 // as live ins.
7252 if (VA.isMemLoc() && VA.needsCustom()) {
7253 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7254 assert(isVarArg && "Only use custom memloc for vararg.");
7255 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7256 // matching custom RegLocs.
7257 const unsigned OriginalValNo = VA.getValNo();
7258 (void)OriginalValNo;
7259
7260 auto HandleCustomVecRegLoc = [&]() {
7261 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7262 "Missing custom RegLoc.");
7263 VA = ArgLocs[I++];
7264 assert(VA.getValVT().isVector() &&
7265 "Unexpected Val type for custom RegLoc.");
7266 assert(VA.getValNo() == OriginalValNo &&
7267 "ValNo mismatch between custom MemLoc and RegLoc.");
7269 MF.addLiveIn(VA.getLocReg(),
7270 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7271 Subtarget.hasVSX()));
7272 };
7273
7274 HandleMemLoc();
7275 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7276 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7277 // R10.
7278 HandleCustomVecRegLoc();
7279 HandleCustomVecRegLoc();
7280
7281 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7282 // we passed the vector in R5, R6, R7 and R8.
7283 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7284 assert(!IsPPC64 &&
7285 "Only 2 custom RegLocs expected for 64-bit codegen.");
7286 HandleCustomVecRegLoc();
7287 HandleCustomVecRegLoc();
7288 }
7289
7290 continue;
7291 }
7292
7293 if (VA.isRegLoc()) {
7294 if (VA.getValVT().isScalarInteger())
7296 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7297 switch (VA.getValVT().SimpleTy) {
7298 default:
7299 report_fatal_error("Unhandled value type for argument.");
7300 case MVT::f32:
7302 break;
7303 case MVT::f64:
7305 break;
7306 }
7307 } else if (VA.getValVT().isVector()) {
7308 switch (VA.getValVT().SimpleTy) {
7309 default:
7310 report_fatal_error("Unhandled value type for argument.");
7311 case MVT::v16i8:
7313 break;
7314 case MVT::v8i16:
7316 break;
7317 case MVT::v4i32:
7318 case MVT::v2i64:
7319 case MVT::v1i128:
7321 break;
7322 case MVT::v4f32:
7323 case MVT::v2f64:
7325 break;
7326 }
7327 }
7328 }
7329
7330 if (Flags.isByVal() && VA.isMemLoc()) {
7331 const unsigned Size =
7332 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7333 PtrByteSize);
7334 const int FI = MF.getFrameInfo().CreateFixedObject(
7335 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7336 /* IsAliased */ true);
7337 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7338 InVals.push_back(FIN);
7339
7340 continue;
7341 }
7342
7343 if (Flags.isByVal()) {
7344 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7345
7346 const MCPhysReg ArgReg = VA.getLocReg();
7347 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7348
7349 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7350 const int FI = MF.getFrameInfo().CreateFixedObject(
7351 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7352 /* IsAliased */ true);
7353 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7354 InVals.push_back(FIN);
7355
7356 // Add live ins for all the RegLocs for the same ByVal.
7357 const TargetRegisterClass *RegClass =
7358 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7359
7360 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7361 unsigned Offset) {
7362 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7363 // Since the callers side has left justified the aggregate in the
7364 // register, we can simply store the entire register into the stack
7365 // slot.
7366 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7367 // The store to the fixedstack object is needed becuase accessing a
7368 // field of the ByVal will use a gep and load. Ideally we will optimize
7369 // to extracting the value from the register directly, and elide the
7370 // stores when the arguments address is not taken, but that will need to
7371 // be future work.
7372 SDValue Store = DAG.getStore(
7373 CopyFrom.getValue(1), dl, CopyFrom,
7376
7377 MemOps.push_back(Store);
7378 };
7379
7380 unsigned Offset = 0;
7381 HandleRegLoc(VA.getLocReg(), Offset);
7382 Offset += PtrByteSize;
7383 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7384 Offset += PtrByteSize) {
7385 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7386 "RegLocs should be for ByVal argument.");
7387
7388 const CCValAssign RL = ArgLocs[I++];
7389 HandleRegLoc(RL.getLocReg(), Offset);
7391 }
7392
7393 if (Offset != StackSize) {
7394 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7395 "Expected MemLoc for remaining bytes.");
7396 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7397 // Consume the MemLoc.The InVal has already been emitted, so nothing
7398 // more needs to be done.
7399 ++I;
7400 }
7401
7402 continue;
7403 }
7404
7405 if (VA.isRegLoc() && !VA.needsCustom()) {
7406 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7407 Register VReg =
7408 MF.addLiveIn(VA.getLocReg(),
7409 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7410 Subtarget.hasVSX()));
7411 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7412 if (ValVT.isScalarInteger() &&
7413 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7414 ArgValue =
7415 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7416 }
7417 InVals.push_back(ArgValue);
7418 continue;
7419 }
7420 if (VA.isMemLoc()) {
7421 HandleMemLoc();
7422 continue;
7423 }
7424 }
7425
7426 // On AIX a minimum of 8 words is saved to the parameter save area.
7427 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7428 // Area that is at least reserved in the caller of this function.
7429 unsigned CallerReservedArea = std::max<unsigned>(
7430 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7431
7432 // Set the size that is at least reserved in caller of this function. Tail
7433 // call optimized function's reserved stack space needs to be aligned so
7434 // that taking the difference between two stack areas will result in an
7435 // aligned stack.
7436 CallerReservedArea =
7437 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7438 FuncInfo->setMinReservedArea(CallerReservedArea);
7439
7440 if (isVarArg) {
7441 int VAListIndex = 0;
7442 // If any of the optional arguments are passed in register then the fixed
7443 // stack object we spill into is not immutable. Create a fixed stack object
7444 // that overlaps the remainder of the parameter save area.
7445 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7446 unsigned FixedStackSize =
7447 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7448 VAListIndex =
7449 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7450 /* IsImmutable */ false, /* IsAliased */ true);
7451 } else {
7452 // All the arguments passed through ellipses are on the stack. Create a
7453 // dummy fixed stack object the same size as a pointer since we don't
7454 // know the actual size.
7455 VAListIndex =
7456 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7457 /* IsImmutable */ true, /* IsAliased */ true);
7458 }
7459
7460 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7461 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7462
7463 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7464 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7465
7466 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7467 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7468 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7469
7470 // The fixed integer arguments of a variadic function are stored to the
7471 // VarArgsFrameIndex on the stack so that they may be loaded by
7472 // dereferencing the result of va_next.
7473 for (unsigned
7474 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7475 Offset = 0;
7476 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7477
7478 const Register VReg =
7479 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7480 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7481
7482 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7483 MachinePointerInfo MPI =
7484 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7485 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7486 MemOps.push_back(Store);
7487 // Increment the address for the next argument to store.
7488 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7489 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7490 }
7491 }
7492
7493 if (!MemOps.empty())
7494 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7495
7496 return Chain;
7497}
7498
7499SDValue PPCTargetLowering::LowerCall_AIX(
7500 SDValue Chain, SDValue Callee, CallFlags CFlags,
7502 const SmallVectorImpl<SDValue> &OutVals,
7503 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7505 const CallBase *CB) const {
7506 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7507 // AIX ABI stack frame layout.
7508
7509 assert((CFlags.CallConv == CallingConv::C ||
7510 CFlags.CallConv == CallingConv::Cold ||
7511 CFlags.CallConv == CallingConv::Fast) &&
7512 "Unexpected calling convention!");
7513
7514 if (CFlags.IsPatchPoint)
7515 report_fatal_error("This call type is unimplemented on AIX.");
7516
7517 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7518
7519 MachineFunction &MF = DAG.getMachineFunction();
7521 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7522 *DAG.getContext());
7523
7524 // Reserve space for the linkage save area (LSA) on the stack.
7525 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7526 // [SP][CR][LR][2 x reserved][TOC].
7527 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7528 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7529 const bool IsPPC64 = Subtarget.isPPC64();
7530 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7531 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7532 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7533 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7534
7535 // The prolog code of the callee may store up to 8 GPR argument registers to
7536 // the stack, allowing va_start to index over them in memory if the callee
7537 // is variadic.
7538 // Because we cannot tell if this is needed on the caller side, we have to
7539 // conservatively assume that it is needed. As such, make sure we have at
7540 // least enough stack space for the caller to store the 8 GPRs.
7541 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7542 const unsigned NumBytes = std::max<unsigned>(
7543 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7544
7545 // Adjust the stack pointer for the new arguments...
7546 // These operations are automatically eliminated by the prolog/epilog pass.
7547 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7548 SDValue CallSeqStart = Chain;
7549
7551 SmallVector<SDValue, 8> MemOpChains;
7552
7553 // Set up a copy of the stack pointer for loading and storing any
7554 // arguments that may not fit in the registers available for argument
7555 // passing.
7556 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7557 : DAG.getRegister(PPC::R1, MVT::i32);
7558
7559 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7560 const unsigned ValNo = ArgLocs[I].getValNo();
7561 SDValue Arg = OutVals[ValNo];
7562 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7563
7564 if (Flags.isByVal()) {
7565 const unsigned ByValSize = Flags.getByValSize();
7566
7567 // Nothing to do for zero-sized ByVals on the caller side.
7568 if (!ByValSize) {
7569 ++I;
7570 continue;
7571 }
7572
7573 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7574 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7575 (LoadOffset != 0)
7576 ? DAG.getObjectPtrOffset(
7577 dl, Arg, TypeSize::getFixed(LoadOffset))
7578 : Arg,
7579 MachinePointerInfo(), VT);
7580 };
7581
7582 unsigned LoadOffset = 0;
7583
7584 // Initialize registers, which are fully occupied by the by-val argument.
7585 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7586 SDValue Load = GetLoad(PtrVT, LoadOffset);
7587 MemOpChains.push_back(Load.getValue(1));
7588 LoadOffset += PtrByteSize;
7589 const CCValAssign &ByValVA = ArgLocs[I++];
7590 assert(ByValVA.getValNo() == ValNo &&
7591 "Unexpected location for pass-by-value argument.");
7592 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7593 }
7594
7595 if (LoadOffset == ByValSize)
7596 continue;
7597
7598 // There must be one more loc to handle the remainder.
7599 assert(ArgLocs[I].getValNo() == ValNo &&
7600 "Expected additional location for by-value argument.");
7601
7602 if (ArgLocs[I].isMemLoc()) {
7603 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7604 const CCValAssign &ByValVA = ArgLocs[I++];
7605 ISD::ArgFlagsTy MemcpyFlags = Flags;
7606 // Only memcpy the bytes that don't pass in register.
7607 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7608 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7609 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7610 dl, Arg, TypeSize::getFixed(LoadOffset))
7611 : Arg,
7613 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7614 CallSeqStart, MemcpyFlags, DAG, dl);
7615 continue;
7616 }
7617
7618 // Initialize the final register residue.
7619 // Any residue that occupies the final by-val arg register must be
7620 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7621 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7622 // 2 and 1 byte loads.
7623 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7624 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7625 "Unexpected register residue for by-value argument.");
7626 SDValue ResidueVal;
7627 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7628 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7629 const MVT VT =
7630 N == 1 ? MVT::i8
7631 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7632 SDValue Load = GetLoad(VT, LoadOffset);
7633 MemOpChains.push_back(Load.getValue(1));
7634 LoadOffset += N;
7635 Bytes += N;
7636
7637 // By-val arguments are passed left-justfied in register.
7638 // Every load here needs to be shifted, otherwise a full register load
7639 // should have been used.
7640 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7641 "Unexpected load emitted during handling of pass-by-value "
7642 "argument.");
7643 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7644 EVT ShiftAmountTy =
7645 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7646 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7647 SDValue ShiftedLoad =
7648 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7649 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7650 ShiftedLoad)
7651 : ShiftedLoad;
7652 }
7653
7654 const CCValAssign &ByValVA = ArgLocs[I++];
7655 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7656 continue;
7657 }
7658
7659 CCValAssign &VA = ArgLocs[I++];
7660 const MVT LocVT = VA.getLocVT();
7661 const MVT ValVT = VA.getValVT();
7662
7663 switch (VA.getLocInfo()) {
7664 default:
7665 report_fatal_error("Unexpected argument extension type.");
7666 case CCValAssign::Full:
7667 break;
7668 case CCValAssign::ZExt:
7669 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7670 break;
7671 case CCValAssign::SExt:
7672 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7673 break;
7674 }
7675
7676 if (VA.isRegLoc() && !VA.needsCustom()) {
7677 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7678 continue;
7679 }
7680
7681 // Vector arguments passed to VarArg functions need custom handling when
7682 // they are passed (at least partially) in GPRs.
7683 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7684 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7685 // Store value to its stack slot.
7686 SDValue PtrOff =
7687 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7688 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7689 SDValue Store =
7690 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7691 MemOpChains.push_back(Store);
7692 const unsigned OriginalValNo = VA.getValNo();
7693 // Then load the GPRs from the stack
7694 unsigned LoadOffset = 0;
7695 auto HandleCustomVecRegLoc = [&]() {
7696 assert(I != E && "Unexpected end of CCvalAssigns.");
7697 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7698 "Expected custom RegLoc.");
7699 CCValAssign RegVA = ArgLocs[I++];
7700 assert(RegVA.getValNo() == OriginalValNo &&
7701 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7702 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7703 DAG.getConstant(LoadOffset, dl, PtrVT));
7704 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7705 MemOpChains.push_back(Load.getValue(1));
7706 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7707 LoadOffset += PtrByteSize;
7708 };
7709
7710 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7711 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7712 // R10.
7713 HandleCustomVecRegLoc();
7714 HandleCustomVecRegLoc();
7715
7716 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7717 ArgLocs[I].getValNo() == OriginalValNo) {
7718 assert(!IsPPC64 &&
7719 "Only 2 custom RegLocs expected for 64-bit codegen.");
7720 HandleCustomVecRegLoc();
7721 HandleCustomVecRegLoc();
7722 }
7723
7724 continue;
7725 }
7726
7727 if (VA.isMemLoc()) {
7728 SDValue PtrOff =
7729 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7730 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7731 MemOpChains.push_back(
7732 DAG.getStore(Chain, dl, Arg, PtrOff,
7734 Subtarget.getFrameLowering()->getStackAlign()));
7735
7736 continue;
7737 }
7738
7739 if (!ValVT.isFloatingPoint())
7741 "Unexpected register handling for calling convention.");
7742
7743 // Custom handling is used for GPR initializations for vararg float
7744 // arguments.
7745 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7746 LocVT.isInteger() &&
7747 "Custom register handling only expected for VarArg.");
7748
7749 SDValue ArgAsInt =
7750 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7751
7752 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7753 // f32 in 32-bit GPR
7754 // f64 in 64-bit GPR
7755 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7756 else if (Arg.getValueType().getFixedSizeInBits() <
7757 LocVT.getFixedSizeInBits())
7758 // f32 in 64-bit GPR.
7759 RegsToPass.push_back(std::make_pair(
7760 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7761 else {
7762 // f64 in two 32-bit GPRs
7763 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7764 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7765 "Unexpected custom register for argument!");
7766 CCValAssign &GPR1 = VA;
7767 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7768 DAG.getConstant(32, dl, MVT::i8));
7769 RegsToPass.push_back(std::make_pair(
7770 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7771
7772 if (I != E) {
7773 // If only 1 GPR was available, there will only be one custom GPR and
7774 // the argument will also pass in memory.
7775 CCValAssign &PeekArg = ArgLocs[I];
7776 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7777 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7778 CCValAssign &GPR2 = ArgLocs[I++];
7779 RegsToPass.push_back(std::make_pair(
7780 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7781 }
7782 }
7783 }
7784 }
7785
7786 if (!MemOpChains.empty())
7787 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7788
7789 // For indirect calls, we need to save the TOC base to the stack for
7790 // restoration after the call.
7791 if (CFlags.IsIndirect && !Subtarget.usePointerGlueHelper()) {
7792 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7793 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7794 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7795 const MVT PtrVT = Subtarget.getScalarIntVT();
7796 const unsigned TOCSaveOffset =
7797 Subtarget.getFrameLowering()->getTOCSaveOffset();
7798
7799 setUsesTOCBasePtr(DAG);
7800 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7801 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7802 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7803 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7804 Chain = DAG.getStore(
7805 Val.getValue(1), dl, Val, AddPtr,
7806 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7807 }
7808
7809 // Build a sequence of copy-to-reg nodes chained together with token chain
7810 // and flag operands which copy the outgoing args into the appropriate regs.
7811 SDValue InGlue;
7812 for (auto Reg : RegsToPass) {
7813 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7814 InGlue = Chain.getValue(1);
7815 }
7816
7817 const int SPDiff = 0;
7818 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7819 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7820}
7821
7822bool
7823PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7824 MachineFunction &MF, bool isVarArg,
7827 const Type *RetTy) const {
7829 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7830 return CCInfo.CheckReturn(
7831 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7833 : RetCC_PPC);
7834}
7835
7836SDValue
7837PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7838 bool isVarArg,
7840 const SmallVectorImpl<SDValue> &OutVals,
7841 const SDLoc &dl, SelectionDAG &DAG) const {
7843 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7844 *DAG.getContext());
7845 CCInfo.AnalyzeReturn(Outs,
7846 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7848 : RetCC_PPC);
7849
7850 SDValue Glue;
7851 SmallVector<SDValue, 4> RetOps(1, Chain);
7852
7853 // Copy the result values into the output registers.
7854 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7855 CCValAssign &VA = RVLocs[i];
7856 assert(VA.isRegLoc() && "Can only return in registers!");
7857
7858 SDValue Arg = OutVals[RealResIdx];
7859
7860 switch (VA.getLocInfo()) {
7861 default: llvm_unreachable("Unknown loc info!");
7862 case CCValAssign::Full: break;
7863 case CCValAssign::AExt:
7864 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7865 break;
7866 case CCValAssign::ZExt:
7867 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7868 break;
7869 case CCValAssign::SExt:
7870 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7871 break;
7872 }
7873 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7874 bool isLittleEndian = Subtarget.isLittleEndian();
7875 // Legalize ret f64 -> ret 2 x i32.
7876 SDValue SVal =
7877 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7878 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7879 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7880 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7881 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7882 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7883 Glue = Chain.getValue(1);
7884 VA = RVLocs[++i]; // skip ahead to next loc
7885 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7886 } else
7887 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7888 Glue = Chain.getValue(1);
7889 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7890 }
7891
7892 RetOps[0] = Chain; // Update chain.
7893
7894 // Add the glue if we have it.
7895 if (Glue.getNode())
7896 RetOps.push_back(Glue);
7897
7898 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7899}
7900
7901SDValue
7902PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7903 SelectionDAG &DAG) const {
7904 SDLoc dl(Op);
7905
7906 // Get the correct type for integers.
7907 EVT IntVT = Op.getValueType();
7908
7909 // Get the inputs.
7910 SDValue Chain = Op.getOperand(0);
7911 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7912 // Build a DYNAREAOFFSET node.
7913 SDValue Ops[2] = {Chain, FPSIdx};
7914 SDVTList VTs = DAG.getVTList(IntVT);
7915 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7916}
7917
7918SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7919 SelectionDAG &DAG) const {
7920 // When we pop the dynamic allocation we need to restore the SP link.
7921 SDLoc dl(Op);
7922
7923 // Get the correct type for pointers.
7924 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7925
7926 // Construct the stack pointer operand.
7927 bool isPPC64 = Subtarget.isPPC64();
7928 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7929 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7930
7931 // Get the operands for the STACKRESTORE.
7932 SDValue Chain = Op.getOperand(0);
7933 SDValue SaveSP = Op.getOperand(1);
7934
7935 // Load the old link SP.
7936 SDValue LoadLinkSP =
7937 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7938
7939 // Restore the stack pointer.
7940 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7941
7942 // Store the old link SP.
7943 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7944}
7945
7946SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7947 MachineFunction &MF = DAG.getMachineFunction();
7948 bool isPPC64 = Subtarget.isPPC64();
7949 EVT PtrVT = getPointerTy(MF.getDataLayout());
7950
7951 // Get current frame pointer save index. The users of this index will be
7952 // primarily DYNALLOC instructions.
7953 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7954 int RASI = FI->getReturnAddrSaveIndex();
7955
7956 // If the frame pointer save index hasn't been defined yet.
7957 if (!RASI) {
7958 // Find out what the fix offset of the frame pointer save area.
7959 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7960 // Allocate the frame index for frame pointer save area.
7961 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7962 // Save the result.
7963 FI->setReturnAddrSaveIndex(RASI);
7964 }
7965 return DAG.getFrameIndex(RASI, PtrVT);
7966}
7967
7968SDValue
7969PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7970 MachineFunction &MF = DAG.getMachineFunction();
7971 bool isPPC64 = Subtarget.isPPC64();
7972 EVT PtrVT = getPointerTy(MF.getDataLayout());
7973
7974 // Get current frame pointer save index. The users of this index will be
7975 // primarily DYNALLOC instructions.
7976 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7977 int FPSI = FI->getFramePointerSaveIndex();
7978
7979 // If the frame pointer save index hasn't been defined yet.
7980 if (!FPSI) {
7981 // Find out what the fix offset of the frame pointer save area.
7982 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7983 // Allocate the frame index for frame pointer save area.
7984 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7985 // Save the result.
7986 FI->setFramePointerSaveIndex(FPSI);
7987 }
7988 return DAG.getFrameIndex(FPSI, PtrVT);
7989}
7990
7991SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 MachineFunction &MF = DAG.getMachineFunction();
7994 // Get the inputs.
7995 SDValue Chain = Op.getOperand(0);
7996 SDValue Size = Op.getOperand(1);
7997 SDLoc dl(Op);
7998
7999 // Get the correct type for pointers.
8000 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8001 // Negate the size.
8002 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8003 DAG.getConstant(0, dl, PtrVT), Size);
8004 // Construct a node for the frame pointer save index.
8005 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8006 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8007 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8008 if (hasInlineStackProbe(MF))
8009 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8010 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8011}
8012
8013SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8014 SelectionDAG &DAG) const {
8015 MachineFunction &MF = DAG.getMachineFunction();
8016
8017 bool isPPC64 = Subtarget.isPPC64();
8018 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8019
8020 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8021 return DAG.getFrameIndex(FI, PtrVT);
8022}
8023
8024SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8025 SelectionDAG &DAG) const {
8026 SDLoc DL(Op);
8027 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8028 DAG.getVTList(MVT::i32, MVT::Other),
8029 Op.getOperand(0), Op.getOperand(1));
8030}
8031
8032SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8033 SelectionDAG &DAG) const {
8034 SDLoc DL(Op);
8035 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8036 Op.getOperand(0), Op.getOperand(1));
8037}
8038
8039SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8040 if (Op.getValueType().isVector())
8041 return LowerVectorLoad(Op, DAG);
8042
8043 assert(Op.getValueType() == MVT::i1 &&
8044 "Custom lowering only for i1 loads");
8045
8046 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8047
8048 SDLoc dl(Op);
8049 LoadSDNode *LD = cast<LoadSDNode>(Op);
8050
8051 SDValue Chain = LD->getChain();
8052 SDValue BasePtr = LD->getBasePtr();
8053 MachineMemOperand *MMO = LD->getMemOperand();
8054
8055 SDValue NewLD =
8056 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8057 BasePtr, MVT::i8, MMO);
8058 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8059
8060 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8061 return DAG.getMergeValues(Ops, dl);
8062}
8063
8064SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8065 if (Op.getOperand(1).getValueType().isVector())
8066 return LowerVectorStore(Op, DAG);
8067
8068 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8069 "Custom lowering only for i1 stores");
8070
8071 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8072
8073 SDLoc dl(Op);
8074 StoreSDNode *ST = cast<StoreSDNode>(Op);
8075
8076 SDValue Chain = ST->getChain();
8077 SDValue BasePtr = ST->getBasePtr();
8078 SDValue Value = ST->getValue();
8079 MachineMemOperand *MMO = ST->getMemOperand();
8080
8082 Value);
8083 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8084}
8085
8086// FIXME: Remove this once the ANDI glue bug is fixed:
8087SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8088 assert(Op.getValueType() == MVT::i1 &&
8089 "Custom lowering only for i1 results");
8090
8091 SDLoc DL(Op);
8092 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8093}
8094
8095SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8096 SelectionDAG &DAG) const {
8097
8098 // Implements a vector truncate that fits in a vector register as a shuffle.
8099 // We want to legalize vector truncates down to where the source fits in
8100 // a vector register (and target is therefore smaller than vector register
8101 // size). At that point legalization will try to custom lower the sub-legal
8102 // result and get here - where we can contain the truncate as a single target
8103 // operation.
8104
8105 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8106 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8107 //
8108 // We will implement it for big-endian ordering as this (where x denotes
8109 // undefined):
8110 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8111 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8112 //
8113 // The same operation in little-endian ordering will be:
8114 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8115 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8116
8117 EVT TrgVT = Op.getValueType();
8118 assert(TrgVT.isVector() && "Vector type expected.");
8119 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8120 EVT EltVT = TrgVT.getVectorElementType();
8121 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8122 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8124 return SDValue();
8125
8126 SDValue N1 = Op.getOperand(0);
8127 EVT SrcVT = N1.getValueType();
8128 unsigned SrcSize = SrcVT.getSizeInBits();
8129 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8132 return SDValue();
8133 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8134 return SDValue();
8135
8136 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8137 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8138
8139 SDLoc DL(Op);
8140 SDValue Op1, Op2;
8141 if (SrcSize == 256) {
8142 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8143 EVT SplitVT =
8145 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8146 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8147 DAG.getConstant(0, DL, VecIdxTy));
8148 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8149 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8150 }
8151 else {
8152 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8153 Op2 = DAG.getUNDEF(WideVT);
8154 }
8155
8156 // First list the elements we want to keep.
8157 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8158 SmallVector<int, 16> ShuffV;
8159 if (Subtarget.isLittleEndian())
8160 for (unsigned i = 0; i < TrgNumElts; ++i)
8161 ShuffV.push_back(i * SizeMult);
8162 else
8163 for (unsigned i = 1; i <= TrgNumElts; ++i)
8164 ShuffV.push_back(i * SizeMult - 1);
8165
8166 // Populate the remaining elements with undefs.
8167 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8168 // ShuffV.push_back(i + WideNumElts);
8169 ShuffV.push_back(WideNumElts + 1);
8170
8171 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8172 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8173 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8174}
8175
8176/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8177/// possible.
8178SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8179 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8180 EVT ResVT = Op.getValueType();
8181 EVT CmpVT = Op.getOperand(0).getValueType();
8182 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8183 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8184 SDLoc dl(Op);
8185
8186 // Without power9-vector, we don't have native instruction for f128 comparison.
8187 // Following transformation to libcall is needed for setcc:
8188 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8189 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8190 SDValue Z = DAG.getSetCC(
8191 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8192 LHS, RHS, CC);
8193 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8194 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8195 }
8196
8197 // Not FP, or using SPE? Not a fsel.
8198 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8199 Subtarget.hasSPE())
8200 return Op;
8201
8202 SDNodeFlags Flags = Op.getNode()->getFlags();
8203
8204 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8205 // presence of infinities.
8206 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8207 switch (CC) {
8208 default:
8209 break;
8210 case ISD::SETOGT:
8211 case ISD::SETGT:
8212 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8213 case ISD::SETOLT:
8214 case ISD::SETLT:
8215 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8216 }
8217 }
8218
8219 // We might be able to do better than this under some circumstances, but in
8220 // general, fsel-based lowering of select is a finite-math-only optimization.
8221 // For more information, see section F.3 of the 2.06 ISA specification.
8222 // With ISA 3.0
8223 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8224 return Op;
8225
8226 // If the RHS of the comparison is a 0.0, we don't need to do the
8227 // subtraction at all.
8228 SDValue Sel1;
8230 switch (CC) {
8231 default: break; // SETUO etc aren't handled by fsel.
8232 case ISD::SETNE:
8233 std::swap(TV, FV);
8234 [[fallthrough]];
8235 case ISD::SETEQ:
8236 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8237 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8238 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8239 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8240 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8241 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8242 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8243 case ISD::SETULT:
8244 case ISD::SETLT:
8245 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8246 [[fallthrough]];
8247 case ISD::SETOGE:
8248 case ISD::SETGE:
8249 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8250 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8251 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8252 case ISD::SETUGT:
8253 case ISD::SETGT:
8254 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8255 [[fallthrough]];
8256 case ISD::SETOLE:
8257 case ISD::SETLE:
8258 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8259 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8260 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8261 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8262 }
8263
8264 SDValue Cmp;
8265 switch (CC) {
8266 default: break; // SETUO etc aren't handled by fsel.
8267 case ISD::SETNE:
8268 std::swap(TV, FV);
8269 [[fallthrough]];
8270 case ISD::SETEQ:
8271 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8272 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8273 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8274 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8275 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8276 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8277 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8278 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8279 case ISD::SETULT:
8280 case ISD::SETLT:
8281 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8282 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8283 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8284 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8285 case ISD::SETOGE:
8286 case ISD::SETGE:
8287 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8288 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8289 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8290 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8291 case ISD::SETUGT:
8292 case ISD::SETGT:
8293 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8294 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8295 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8296 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8297 case ISD::SETOLE:
8298 case ISD::SETLE:
8299 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8300 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8301 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8302 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8303 }
8304 return Op;
8305}
8306
8307static unsigned getPPCStrictOpcode(unsigned Opc) {
8308 switch (Opc) {
8309 default:
8310 llvm_unreachable("No strict version of this opcode!");
8311 case PPCISD::FCTIDZ:
8312 return PPCISD::STRICT_FCTIDZ;
8313 case PPCISD::FCTIWZ:
8314 return PPCISD::STRICT_FCTIWZ;
8315 case PPCISD::FCTIDUZ:
8316 return PPCISD::STRICT_FCTIDUZ;
8317 case PPCISD::FCTIWUZ:
8318 return PPCISD::STRICT_FCTIWUZ;
8319 case PPCISD::FCFID:
8320 return PPCISD::STRICT_FCFID;
8321 case PPCISD::FCFIDU:
8322 return PPCISD::STRICT_FCFIDU;
8323 case PPCISD::FCFIDS:
8324 return PPCISD::STRICT_FCFIDS;
8325 case PPCISD::FCFIDUS:
8326 return PPCISD::STRICT_FCFIDUS;
8327 }
8328}
8329
8331 const PPCSubtarget &Subtarget) {
8332 SDLoc dl(Op);
8333 bool IsStrict = Op->isStrictFPOpcode();
8334 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8335 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8336
8337 // TODO: Any other flags to propagate?
8338 SDNodeFlags Flags;
8339 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8340
8341 // For strict nodes, source is the second operand.
8342 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8343 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8344 MVT DestTy = Op.getSimpleValueType();
8345 assert(Src.getValueType().isFloatingPoint() &&
8346 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8347 DestTy == MVT::i64) &&
8348 "Invalid FP_TO_INT types");
8349 if (Src.getValueType() == MVT::f32) {
8350 if (IsStrict) {
8351 Src =
8353 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8354 Chain = Src.getValue(1);
8355 } else
8356 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8357 }
8358 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8359 DestTy = Subtarget.getScalarIntVT();
8360 unsigned Opc = ISD::DELETED_NODE;
8361 switch (DestTy.SimpleTy) {
8362 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8363 case MVT::i32:
8364 Opc = IsSigned ? PPCISD::FCTIWZ
8365 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8366 break;
8367 case MVT::i64:
8368 assert((IsSigned || Subtarget.hasFPCVT()) &&
8369 "i64 FP_TO_UINT is supported only with FPCVT");
8370 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8371 }
8372 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8373 SDValue Conv;
8374 if (IsStrict) {
8376 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8377 Flags);
8378 } else {
8379 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8380 }
8381 return Conv;
8382}
8383
8384void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8385 SelectionDAG &DAG,
8386 const SDLoc &dl) const {
8387 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8388 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8389 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8390 bool IsStrict = Op->isStrictFPOpcode();
8391
8392 // Convert the FP value to an int value through memory.
8393 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8394 (IsSigned || Subtarget.hasFPCVT());
8395 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8396 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8397 MachinePointerInfo MPI =
8399
8400 // Emit a store to the stack slot.
8401 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8402 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8403 if (i32Stack) {
8404 MachineFunction &MF = DAG.getMachineFunction();
8405 Alignment = Align(4);
8406 MachineMemOperand *MMO =
8407 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8408 SDValue Ops[] = { Chain, Tmp, FIPtr };
8409 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8410 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8411 } else
8412 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8413
8414 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8415 // add in a bias on big endian.
8416 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8417 !Subtarget.isLittleEndian()) {
8418 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8419 DAG.getConstant(4, dl, FIPtr.getValueType()));
8420 MPI = MPI.getWithOffset(4);
8421 }
8422
8423 RLI.Chain = Chain;
8424 RLI.Ptr = FIPtr;
8425 RLI.MPI = MPI;
8426 RLI.Alignment = Alignment;
8427}
8428
8429/// Custom lowers floating point to integer conversions to use
8430/// the direct move instructions available in ISA 2.07 to avoid the
8431/// need for load/store combinations.
8432SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8433 SelectionDAG &DAG,
8434 const SDLoc &dl) const {
8435 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8436 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8437 if (Op->isStrictFPOpcode())
8438 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8439 else
8440 return Mov;
8441}
8442
8443SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8444 const SDLoc &dl) const {
8445 bool IsStrict = Op->isStrictFPOpcode();
8446 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8447 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8448 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8449 EVT SrcVT = Src.getValueType();
8450 EVT DstVT = Op.getValueType();
8451
8452 // FP to INT conversions are legal for f128.
8453 if (SrcVT == MVT::f128)
8454 return Subtarget.hasP9Vector() ? Op : SDValue();
8455
8456 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8457 // PPC (the libcall is not available).
8458 if (SrcVT == MVT::ppcf128) {
8459 if (DstVT == MVT::i32) {
8460 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8461 // set other fast-math flags to FP operations in both strict and
8462 // non-strict cases. (FP_TO_SINT, FSUB)
8463 SDNodeFlags Flags;
8464 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8465
8466 if (IsSigned) {
8467 SDValue Lo, Hi;
8468 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8469
8470 // Add the two halves of the long double in round-to-zero mode, and use
8471 // a smaller FP_TO_SINT.
8472 if (IsStrict) {
8473 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8474 DAG.getVTList(MVT::f64, MVT::Other),
8475 {Op.getOperand(0), Lo, Hi}, Flags);
8476 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8477 DAG.getVTList(MVT::i32, MVT::Other),
8478 {Res.getValue(1), Res}, Flags);
8479 } else {
8480 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8481 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8482 }
8483 } else {
8484 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8485 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8486 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8487 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8488 if (IsStrict) {
8489 // Sel = Src < 0x80000000
8490 // FltOfs = select Sel, 0.0, 0x80000000
8491 // IntOfs = select Sel, 0, 0x80000000
8492 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8493 SDValue Chain = Op.getOperand(0);
8494 EVT SetCCVT =
8495 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8496 EVT DstSetCCVT =
8497 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8498 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8499 Chain, true);
8500 Chain = Sel.getValue(1);
8501
8502 SDValue FltOfs = DAG.getSelect(
8503 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8504 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8505
8506 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8507 DAG.getVTList(SrcVT, MVT::Other),
8508 {Chain, Src, FltOfs}, Flags);
8509 Chain = Val.getValue(1);
8510 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8511 DAG.getVTList(DstVT, MVT::Other),
8512 {Chain, Val}, Flags);
8513 Chain = SInt.getValue(1);
8514 SDValue IntOfs = DAG.getSelect(
8515 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8516 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8517 return DAG.getMergeValues({Result, Chain}, dl);
8518 } else {
8519 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8520 // FIXME: generated code sucks.
8521 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8522 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8523 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8524 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8525 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8526 }
8527 }
8528 }
8529
8530 return SDValue();
8531 }
8532
8533 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8534 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8535
8536 ReuseLoadInfo RLI;
8537 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8538
8539 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8540 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8541}
8542
8543// We're trying to insert a regular store, S, and then a load, L. If the
8544// incoming value, O, is a load, we might just be able to have our load use the
8545// address used by O. However, we don't know if anything else will store to
8546// that address before we can load from it. To prevent this situation, we need
8547// to insert our load, L, into the chain as a peer of O. To do this, we give L
8548// the same chain operand as O, we create a token factor from the chain results
8549// of O and L, and we replace all uses of O's chain result with that token
8550// factor (this last part is handled by makeEquivalentMemoryOrdering).
8551bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8552 ReuseLoadInfo &RLI,
8553 SelectionDAG &DAG,
8554 ISD::LoadExtType ET) const {
8555 // Conservatively skip reusing for constrained FP nodes.
8556 if (Op->isStrictFPOpcode())
8557 return false;
8558
8559 SDLoc dl(Op);
8560 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8561 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8562 if (ET == ISD::NON_EXTLOAD &&
8563 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8564 isOperationLegalOrCustom(Op.getOpcode(),
8565 Op.getOperand(0).getValueType())) {
8566
8567 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8568 return true;
8569 }
8570
8571 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8572 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8573 LD->isNonTemporal())
8574 return false;
8575 if (LD->getMemoryVT() != MemVT)
8576 return false;
8577
8578 // If the result of the load is an illegal type, then we can't build a
8579 // valid chain for reuse since the legalised loads and token factor node that
8580 // ties the legalised loads together uses a different output chain then the
8581 // illegal load.
8582 if (!isTypeLegal(LD->getValueType(0)))
8583 return false;
8584
8585 RLI.Ptr = LD->getBasePtr();
8586 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8587 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8588 "Non-pre-inc AM on PPC?");
8589 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8590 LD->getOffset());
8591 }
8592
8593 RLI.Chain = LD->getChain();
8594 RLI.MPI = LD->getPointerInfo();
8595 RLI.IsDereferenceable = LD->isDereferenceable();
8596 RLI.IsInvariant = LD->isInvariant();
8597 RLI.Alignment = LD->getAlign();
8598 RLI.AAInfo = LD->getAAInfo();
8599 RLI.Ranges = LD->getRanges();
8600
8601 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8602 return true;
8603}
8604
8605/// Analyze profitability of direct move
8606/// prefer float load to int load plus direct move
8607/// when there is no integer use of int load
8608bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8609 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8610 if (Origin->getOpcode() != ISD::LOAD)
8611 return true;
8612
8613 // If there is no LXSIBZX/LXSIHZX, like Power8,
8614 // prefer direct move if the memory size is 1 or 2 bytes.
8615 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8616 if (!Subtarget.hasP9Vector() &&
8617 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8618 return true;
8619
8620 for (SDUse &Use : Origin->uses()) {
8621
8622 // Only look at the users of the loaded value.
8623 if (Use.getResNo() != 0)
8624 continue;
8625
8626 SDNode *User = Use.getUser();
8627 if (User->getOpcode() != ISD::SINT_TO_FP &&
8628 User->getOpcode() != ISD::UINT_TO_FP &&
8629 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8630 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8631 return true;
8632 }
8633
8634 return false;
8635}
8636
8638 const PPCSubtarget &Subtarget,
8639 SDValue Chain = SDValue()) {
8640 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8641 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8642 SDLoc dl(Op);
8643
8644 // TODO: Any other flags to propagate?
8645 SDNodeFlags Flags;
8646 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8647
8648 // If we have FCFIDS, then use it when converting to single-precision.
8649 // Otherwise, convert to double-precision and then round.
8650 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8651 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8652 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8653 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8654 if (Op->isStrictFPOpcode()) {
8655 if (!Chain)
8656 Chain = Op.getOperand(0);
8657 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8658 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8659 } else
8660 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8661}
8662
8663/// Custom lowers integer to floating point conversions to use
8664/// the direct move instructions available in ISA 2.07 to avoid the
8665/// need for load/store combinations.
8666SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8667 SelectionDAG &DAG,
8668 const SDLoc &dl) const {
8669 assert((Op.getValueType() == MVT::f32 ||
8670 Op.getValueType() == MVT::f64) &&
8671 "Invalid floating point type as target of conversion");
8672 assert(Subtarget.hasFPCVT() &&
8673 "Int to FP conversions with direct moves require FPCVT");
8674 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8675 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8676 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8677 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8678 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8679 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8680 return convertIntToFP(Op, Mov, DAG, Subtarget);
8681}
8682
8683static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8684
8685 EVT VecVT = Vec.getValueType();
8686 assert(VecVT.isVector() && "Expected a vector type.");
8687 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8688
8689 EVT EltVT = VecVT.getVectorElementType();
8690 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8691 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8692
8693 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8694 SmallVector<SDValue, 16> Ops(NumConcat);
8695 Ops[0] = Vec;
8696 SDValue UndefVec = DAG.getUNDEF(VecVT);
8697 for (unsigned i = 1; i < NumConcat; ++i)
8698 Ops[i] = UndefVec;
8699
8700 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8701}
8702
8703SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8704 const SDLoc &dl) const {
8705 bool IsStrict = Op->isStrictFPOpcode();
8706 unsigned Opc = Op.getOpcode();
8707 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8710 "Unexpected conversion type");
8711 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8712 "Supports conversions to v2f64/v4f32 only.");
8713
8714 // TODO: Any other flags to propagate?
8715 SDNodeFlags Flags;
8716 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8717
8718 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8719 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8720
8721 SDValue Wide = widenVec(DAG, Src, dl);
8722 EVT WideVT = Wide.getValueType();
8723 unsigned WideNumElts = WideVT.getVectorNumElements();
8724 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8725
8726 SmallVector<int, 16> ShuffV;
8727 for (unsigned i = 0; i < WideNumElts; ++i)
8728 ShuffV.push_back(i + WideNumElts);
8729
8730 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8731 int SaveElts = FourEltRes ? 4 : 2;
8732 if (Subtarget.isLittleEndian())
8733 for (int i = 0; i < SaveElts; i++)
8734 ShuffV[i * Stride] = i;
8735 else
8736 for (int i = 1; i <= SaveElts; i++)
8737 ShuffV[i * Stride - 1] = i - 1;
8738
8739 SDValue ShuffleSrc2 =
8740 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8741 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8742
8743 SDValue Extend;
8744 if (SignedConv) {
8745 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8746 EVT ExtVT = Src.getValueType();
8747 if (Subtarget.hasP9Altivec())
8748 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8749 IntermediateVT.getVectorNumElements());
8750
8751 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8752 DAG.getValueType(ExtVT));
8753 } else
8754 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8755
8756 if (IsStrict)
8757 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8758 {Op.getOperand(0), Extend}, Flags);
8759
8760 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8761}
8762
8763SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8764 SelectionDAG &DAG) const {
8765 SDLoc dl(Op);
8766 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8767 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8768 bool IsStrict = Op->isStrictFPOpcode();
8769 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8770 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8771
8772 // TODO: Any other flags to propagate?
8773 SDNodeFlags Flags;
8774 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8775
8776 EVT InVT = Src.getValueType();
8777 EVT OutVT = Op.getValueType();
8778 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8779 isOperationCustom(Op.getOpcode(), InVT))
8780 return LowerINT_TO_FPVector(Op, DAG, dl);
8781
8782 // Conversions to f128 are legal.
8783 if (Op.getValueType() == MVT::f128)
8784 return Subtarget.hasP9Vector() ? Op : SDValue();
8785
8786 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8787 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8788 return SDValue();
8789
8790 if (Src.getValueType() == MVT::i1) {
8791 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8792 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8793 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8794 if (IsStrict)
8795 return DAG.getMergeValues({Sel, Chain}, dl);
8796 else
8797 return Sel;
8798 }
8799
8800 // If we have direct moves, we can do all the conversion, skip the store/load
8801 // however, without FPCVT we can't do most conversions.
8802 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8803 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8804 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8805
8806 assert((IsSigned || Subtarget.hasFPCVT()) &&
8807 "UINT_TO_FP is supported only with FPCVT");
8808
8809 if (Src.getValueType() == MVT::i64) {
8810 SDValue SINT = Src;
8811 // When converting to single-precision, we actually need to convert
8812 // to double-precision first and then round to single-precision.
8813 // To avoid double-rounding effects during that operation, we have
8814 // to prepare the input operand. Bits that might be truncated when
8815 // converting to double-precision are replaced by a bit that won't
8816 // be lost at this stage, but is below the single-precision rounding
8817 // position.
8818 //
8819 // However, if afn is in effect, accept double
8820 // rounding to avoid the extra overhead.
8821 // FIXME: Currently INT_TO_FP can't support fast math flags because
8822 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8823 // false.
8824 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8825 !Op->getFlags().hasApproximateFuncs()) {
8826
8827 // Twiddle input to make sure the low 11 bits are zero. (If this
8828 // is the case, we are guaranteed the value will fit into the 53 bit
8829 // mantissa of an IEEE double-precision value without rounding.)
8830 // If any of those low 11 bits were not zero originally, make sure
8831 // bit 12 (value 2048) is set instead, so that the final rounding
8832 // to single-precision gets the correct result.
8833 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8834 SINT, DAG.getConstant(2047, dl, MVT::i64));
8835 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8836 Round, DAG.getConstant(2047, dl, MVT::i64));
8837 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8838 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8839 DAG.getSignedConstant(-2048, dl, MVT::i64));
8840
8841 // However, we cannot use that value unconditionally: if the magnitude
8842 // of the input value is small, the bit-twiddling we did above might
8843 // end up visibly changing the output. Fortunately, in that case, we
8844 // don't need to twiddle bits since the original input will convert
8845 // exactly to double-precision floating-point already. Therefore,
8846 // construct a conditional to use the original value if the top 11
8847 // bits are all sign-bit copies, and use the rounded value computed
8848 // above otherwise.
8849 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8850 SINT, DAG.getConstant(53, dl, MVT::i32));
8851 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8852 Cond, DAG.getConstant(1, dl, MVT::i64));
8853 Cond = DAG.getSetCC(
8854 dl,
8855 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8856 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8857
8858 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8859 }
8860
8861 ReuseLoadInfo RLI;
8862 SDValue Bits;
8863
8864 MachineFunction &MF = DAG.getMachineFunction();
8865 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8866 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8867 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8868 if (RLI.ResChain)
8869 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8870 } else if (Subtarget.hasLFIWAX() &&
8871 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8872 MachineMemOperand *MMO =
8874 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8875 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8876 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8877 DAG.getVTList(MVT::f64, MVT::Other),
8878 Ops, MVT::i32, MMO);
8879 if (RLI.ResChain)
8880 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8881 } else if (Subtarget.hasFPCVT() &&
8882 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8883 MachineMemOperand *MMO =
8885 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8886 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8887 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8888 DAG.getVTList(MVT::f64, MVT::Other),
8889 Ops, MVT::i32, MMO);
8890 if (RLI.ResChain)
8891 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8892 } else if (((Subtarget.hasLFIWAX() &&
8893 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8894 (Subtarget.hasFPCVT() &&
8895 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8896 SINT.getOperand(0).getValueType() == MVT::i32) {
8897 MachineFrameInfo &MFI = MF.getFrameInfo();
8898 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8899
8900 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8901 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8902
8903 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8905 DAG.getMachineFunction(), FrameIdx));
8906 Chain = Store;
8907
8908 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8909 "Expected an i32 store");
8910
8911 RLI.Ptr = FIdx;
8912 RLI.Chain = Chain;
8913 RLI.MPI =
8915 RLI.Alignment = Align(4);
8916
8917 MachineMemOperand *MMO =
8919 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8920 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8922 PPCISD::LFIWZX : PPCISD::LFIWAX,
8923 dl, DAG.getVTList(MVT::f64, MVT::Other),
8924 Ops, MVT::i32, MMO);
8925 Chain = Bits.getValue(1);
8926 } else
8927 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8928
8929 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8930 if (IsStrict)
8931 Chain = FP.getValue(1);
8932
8933 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8934 if (IsStrict)
8935 FP = DAG.getNode(
8936 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8937 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8938 Flags);
8939 else
8940 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8941 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8942 }
8943 return FP;
8944 }
8945
8946 assert(Src.getValueType() == MVT::i32 &&
8947 "Unhandled INT_TO_FP type in custom expander!");
8948 // Since we only generate this in 64-bit mode, we can take advantage of
8949 // 64-bit registers. In particular, sign extend the input value into the
8950 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8951 // then lfd it and fcfid it.
8952 MachineFunction &MF = DAG.getMachineFunction();
8953 MachineFrameInfo &MFI = MF.getFrameInfo();
8954 EVT PtrVT = getPointerTy(MF.getDataLayout());
8955
8956 SDValue Ld;
8957 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8958 ReuseLoadInfo RLI;
8959 bool ReusingLoad;
8960 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8961 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8962 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8963
8964 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8966 DAG.getMachineFunction(), FrameIdx));
8967 Chain = Store;
8968
8969 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8970 "Expected an i32 store");
8971
8972 RLI.Ptr = FIdx;
8973 RLI.Chain = Chain;
8974 RLI.MPI =
8976 RLI.Alignment = Align(4);
8977 }
8978
8979 MachineMemOperand *MMO =
8981 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8982 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8983 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8984 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8985 MVT::i32, MMO);
8986 Chain = Ld.getValue(1);
8987 if (ReusingLoad && RLI.ResChain) {
8988 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8989 }
8990 } else {
8991 assert(Subtarget.isPPC64() &&
8992 "i32->FP without LFIWAX supported only on PPC64");
8993
8994 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8995 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8996
8997 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8998
8999 // STD the extended value into the stack slot.
9000 SDValue Store = DAG.getStore(
9001 Chain, dl, Ext64, FIdx,
9003 Chain = Store;
9004
9005 // Load the value as a double.
9006 Ld = DAG.getLoad(
9007 MVT::f64, dl, Chain, FIdx,
9009 Chain = Ld.getValue(1);
9010 }
9011
9012 // FCFID it and return it.
9013 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9014 if (IsStrict)
9015 Chain = FP.getValue(1);
9016 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9017 if (IsStrict)
9018 FP = DAG.getNode(
9019 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9020 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9021 else
9022 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9023 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9024 }
9025 return FP;
9026}
9027
9028SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9029 SelectionDAG &DAG) const {
9030 SDLoc Dl(Op);
9031 MachineFunction &MF = DAG.getMachineFunction();
9032 EVT PtrVT = getPointerTy(MF.getDataLayout());
9033 SDValue Chain = Op.getOperand(0);
9034
9035 // If requested mode is constant, just use simpler mtfsb/mffscrni
9036 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9037 uint64_t Mode = CVal->getZExtValue();
9038 assert(Mode < 4 && "Unsupported rounding mode!");
9039 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9040 if (Subtarget.isISA3_0())
9041 return SDValue(
9042 DAG.getMachineNode(
9043 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9044 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9045 1);
9046 SDNode *SetHi = DAG.getMachineNode(
9047 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9048 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9049 SDNode *SetLo = DAG.getMachineNode(
9050 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9051 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9052 return SDValue(SetLo, 0);
9053 }
9054
9055 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9056 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9057 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9058 DAG.getConstant(3, Dl, MVT::i32));
9059 SDValue DstFlag = DAG.getNode(
9060 ISD::XOR, Dl, MVT::i32, SrcFlag,
9061 DAG.getNode(ISD::AND, Dl, MVT::i32,
9062 DAG.getNOT(Dl,
9063 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9064 MVT::i32),
9065 One));
9066 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9067 SDValue MFFS;
9068 if (!Subtarget.isISA3_0()) {
9069 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9070 Chain = MFFS.getValue(1);
9071 }
9072 SDValue NewFPSCR;
9073 if (Subtarget.isPPC64()) {
9074 if (Subtarget.isISA3_0()) {
9075 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9076 } else {
9077 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9078 SDNode *InsertRN = DAG.getMachineNode(
9079 PPC::RLDIMI, Dl, MVT::i64,
9080 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9081 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9082 DAG.getTargetConstant(0, Dl, MVT::i32),
9083 DAG.getTargetConstant(62, Dl, MVT::i32)});
9084 NewFPSCR = SDValue(InsertRN, 0);
9085 }
9086 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9087 } else {
9088 // In 32-bit mode, store f64, load and update the lower half.
9089 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9090 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9091 SDValue Addr = Subtarget.isLittleEndian()
9092 ? StackSlot
9093 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9094 DAG.getConstant(4, Dl, PtrVT));
9095 if (Subtarget.isISA3_0()) {
9096 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9097 } else {
9098 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9099 SDValue Tmp =
9100 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9101 Chain = Tmp.getValue(1);
9102 Tmp = SDValue(DAG.getMachineNode(
9103 PPC::RLWIMI, Dl, MVT::i32,
9104 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9105 DAG.getTargetConstant(30, Dl, MVT::i32),
9106 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9107 0);
9108 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9109 }
9110 NewFPSCR =
9111 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9112 Chain = NewFPSCR.getValue(1);
9113 }
9114 if (Subtarget.isISA3_0())
9115 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9116 {NewFPSCR, Chain}),
9117 1);
9118 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9119 SDNode *MTFSF = DAG.getMachineNode(
9120 PPC::MTFSF, Dl, MVT::Other,
9121 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9122 return SDValue(MTFSF, 0);
9123}
9124
9125SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9126 SelectionDAG &DAG) const {
9127 SDLoc dl(Op);
9128 /*
9129 The rounding mode is in bits 30:31 of FPSR, and has the following
9130 settings:
9131 00 Round to nearest
9132 01 Round to 0
9133 10 Round to +inf
9134 11 Round to -inf
9135
9136 GET_ROUNDING, on the other hand, expects the following:
9137 -1 Undefined
9138 0 Round to 0
9139 1 Round to nearest
9140 2 Round to +inf
9141 3 Round to -inf
9142
9143 To perform the conversion, we do:
9144 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9145 */
9146
9147 MachineFunction &MF = DAG.getMachineFunction();
9148 EVT VT = Op.getValueType();
9149 EVT PtrVT = getPointerTy(MF.getDataLayout());
9150
9151 // Save FP Control Word to register
9152 SDValue Chain = Op.getOperand(0);
9153 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9154 Chain = MFFS.getValue(1);
9155
9156 SDValue CWD;
9157 if (isTypeLegal(MVT::i64)) {
9158 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9159 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9160 } else {
9161 // Save FP register to stack slot
9162 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9163 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9164 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9165
9166 // Load FP Control Word from low 32 bits of stack slot.
9168 "Stack slot adjustment is valid only on big endian subtargets!");
9169 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9170 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9171 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9172 Chain = CWD.getValue(1);
9173 }
9174
9175 // Transform as necessary
9176 SDValue CWD1 =
9177 DAG.getNode(ISD::AND, dl, MVT::i32,
9178 CWD, DAG.getConstant(3, dl, MVT::i32));
9179 SDValue CWD2 =
9180 DAG.getNode(ISD::SRL, dl, MVT::i32,
9181 DAG.getNode(ISD::AND, dl, MVT::i32,
9182 DAG.getNode(ISD::XOR, dl, MVT::i32,
9183 CWD, DAG.getConstant(3, dl, MVT::i32)),
9184 DAG.getConstant(3, dl, MVT::i32)),
9185 DAG.getConstant(1, dl, MVT::i32));
9186
9187 SDValue RetVal =
9188 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9189
9190 RetVal =
9192 dl, VT, RetVal);
9193
9194 return DAG.getMergeValues({RetVal, Chain}, dl);
9195}
9196
9197SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9198 EVT VT = Op.getValueType();
9199 uint64_t BitWidth = VT.getSizeInBits();
9200 SDLoc dl(Op);
9201 assert(Op.getNumOperands() == 3 &&
9202 VT == Op.getOperand(1).getValueType() &&
9203 "Unexpected SHL!");
9204
9205 // Expand into a bunch of logical ops. Note that these ops
9206 // depend on the PPC behavior for oversized shift amounts.
9207 SDValue Lo = Op.getOperand(0);
9208 SDValue Hi = Op.getOperand(1);
9209 SDValue Amt = Op.getOperand(2);
9210 EVT AmtVT = Amt.getValueType();
9211
9212 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9213 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9214 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9215 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9216 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9217 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9218 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9219 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9220 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9221 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9222 SDValue OutOps[] = { OutLo, OutHi };
9223 return DAG.getMergeValues(OutOps, dl);
9224}
9225
9226SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9227 EVT VT = Op.getValueType();
9228 SDLoc dl(Op);
9229 uint64_t BitWidth = VT.getSizeInBits();
9230 assert(Op.getNumOperands() == 3 &&
9231 VT == Op.getOperand(1).getValueType() &&
9232 "Unexpected SRL!");
9233
9234 // Expand into a bunch of logical ops. Note that these ops
9235 // depend on the PPC behavior for oversized shift amounts.
9236 SDValue Lo = Op.getOperand(0);
9237 SDValue Hi = Op.getOperand(1);
9238 SDValue Amt = Op.getOperand(2);
9239 EVT AmtVT = Amt.getValueType();
9240
9241 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9242 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9243 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9244 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9245 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9246 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9247 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9248 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9249 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9250 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9251 SDValue OutOps[] = { OutLo, OutHi };
9252 return DAG.getMergeValues(OutOps, dl);
9253}
9254
9255SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9256 SDLoc dl(Op);
9257 EVT VT = Op.getValueType();
9258 uint64_t BitWidth = VT.getSizeInBits();
9259 assert(Op.getNumOperands() == 3 &&
9260 VT == Op.getOperand(1).getValueType() &&
9261 "Unexpected SRA!");
9262
9263 // Expand into a bunch of logical ops, followed by a select_cc.
9264 SDValue Lo = Op.getOperand(0);
9265 SDValue Hi = Op.getOperand(1);
9266 SDValue Amt = Op.getOperand(2);
9267 EVT AmtVT = Amt.getValueType();
9268
9269 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9270 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9271 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9272 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9273 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9274 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9275 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9276 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9277 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9278 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9279 Tmp4, Tmp6, ISD::SETLE);
9280 SDValue OutOps[] = { OutLo, OutHi };
9281 return DAG.getMergeValues(OutOps, dl);
9282}
9283
9284SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9285 SelectionDAG &DAG) const {
9286 SDLoc dl(Op);
9287 EVT VT = Op.getValueType();
9288 unsigned BitWidth = VT.getSizeInBits();
9289
9290 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9291 SDValue X = Op.getOperand(0);
9292 SDValue Y = Op.getOperand(1);
9293 SDValue Z = Op.getOperand(2);
9294 EVT AmtVT = Z.getValueType();
9295
9296 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9297 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9298 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9299 // on PowerPC shift by BW being well defined.
9300 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9301 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9302 SDValue SubZ =
9303 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9304 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9305 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9306 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9307}
9308
9309//===----------------------------------------------------------------------===//
9310// Vector related lowering.
9311//
9312
9313/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9314/// element size of SplatSize. Cast the result to VT.
9315static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9316 SelectionDAG &DAG, const SDLoc &dl) {
9317 static const MVT VTys[] = { // canonical VT to use for each size.
9318 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9319 };
9320
9321 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9322
9323 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9324 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9325 SplatSize = 1;
9326 Val = 0xFF;
9327 }
9328
9329 EVT CanonicalVT = VTys[SplatSize-1];
9330
9331 // Build a canonical splat for this value.
9332 // Explicitly truncate APInt here, as this API is used with a mix of
9333 // signed and unsigned values.
9334 return DAG.getBitcast(
9335 ReqVT,
9336 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9337}
9338
9339/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9340/// specified intrinsic ID.
9342 const SDLoc &dl, EVT DestVT = MVT::Other) {
9343 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9344 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9345 DAG.getConstant(IID, dl, MVT::i32), Op);
9346}
9347
9348/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9349/// specified intrinsic ID.
9351 SelectionDAG &DAG, const SDLoc &dl,
9352 EVT DestVT = MVT::Other) {
9353 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9354 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9355 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9356}
9357
9358/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9359/// specified intrinsic ID.
9360static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9361 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9362 EVT DestVT = MVT::Other) {
9363 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9364 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9365 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9366}
9367
9368/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9369/// amount. The result has the specified value type.
9370static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9371 SelectionDAG &DAG, const SDLoc &dl) {
9372 // Force LHS/RHS to be the right type.
9373 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9374 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9375
9376 int Ops[16];
9377 for (unsigned i = 0; i != 16; ++i)
9378 Ops[i] = i + Amt;
9379 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9380 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9381}
9382
9383/// Do we have an efficient pattern in a .td file for this node?
9384///
9385/// \param V - pointer to the BuildVectorSDNode being matched
9386/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9387///
9388/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9389/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9390/// the opposite is true (expansion is beneficial) are:
9391/// - The node builds a vector out of integers that are not 32 or 64-bits
9392/// - The node builds a vector out of constants
9393/// - The node is a "load-and-splat"
9394/// In all other cases, we will choose to keep the BUILD_VECTOR.
9396 bool HasDirectMove,
9397 bool HasP8Vector) {
9398 EVT VecVT = V->getValueType(0);
9399 bool RightType = VecVT == MVT::v2f64 ||
9400 (HasP8Vector && VecVT == MVT::v4f32) ||
9401 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9402 if (!RightType)
9403 return false;
9404
9405 bool IsSplat = true;
9406 bool IsLoad = false;
9407 SDValue Op0 = V->getOperand(0);
9408
9409 // This function is called in a block that confirms the node is not a constant
9410 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9411 // different constants.
9412 if (V->isConstant())
9413 return false;
9414 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9415 if (V->getOperand(i).isUndef())
9416 return false;
9417 // We want to expand nodes that represent load-and-splat even if the
9418 // loaded value is a floating point truncation or conversion to int.
9419 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9420 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9421 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9422 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9423 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9424 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9425 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9426 IsLoad = true;
9427 // If the operands are different or the input is not a load and has more
9428 // uses than just this BV node, then it isn't a splat.
9429 if (V->getOperand(i) != Op0 ||
9430 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9431 IsSplat = false;
9432 }
9433 return !(IsSplat && IsLoad);
9434}
9435
9436// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9437SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9438
9439 SDLoc dl(Op);
9440 SDValue Op0 = Op->getOperand(0);
9441
9442 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9443 (Op.getValueType() != MVT::f128))
9444 return SDValue();
9445
9446 SDValue Lo = Op0.getOperand(0);
9447 SDValue Hi = Op0.getOperand(1);
9448 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9449 return SDValue();
9450
9451 if (!Subtarget.isLittleEndian())
9452 std::swap(Lo, Hi);
9453
9454 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9455}
9456
9457static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9458 const SDValue *InputLoad = &Op;
9459 while (InputLoad->getOpcode() == ISD::BITCAST)
9460 InputLoad = &InputLoad->getOperand(0);
9461 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9462 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9463 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9464 InputLoad = &InputLoad->getOperand(0);
9465 }
9466 if (InputLoad->getOpcode() != ISD::LOAD)
9467 return nullptr;
9468 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9469 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9470}
9471
9472// Convert the argument APFloat to a single precision APFloat if there is no
9473// loss in information during the conversion to single precision APFloat and the
9474// resulting number is not a denormal number. Return true if successful.
9476 APFloat APFloatToConvert = ArgAPFloat;
9477 bool LosesInfo = true;
9479 &LosesInfo);
9480 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9481 if (Success)
9482 ArgAPFloat = APFloatToConvert;
9483 return Success;
9484}
9485
9486// Bitcast the argument APInt to a double and convert it to a single precision
9487// APFloat, bitcast the APFloat to an APInt and assign it to the original
9488// argument if there is no loss in information during the conversion from
9489// double to single precision APFloat and the resulting number is not a denormal
9490// number. Return true if successful.
9492 double DpValue = ArgAPInt.bitsToDouble();
9493 APFloat APFloatDp(DpValue);
9494 bool Success = convertToNonDenormSingle(APFloatDp);
9495 if (Success)
9496 ArgAPInt = APFloatDp.bitcastToAPInt();
9497 return Success;
9498}
9499
9500// Nondestructive check for convertTonNonDenormSingle.
9502 // Only convert if it loses info, since XXSPLTIDP should
9503 // handle the other case.
9504 APFloat APFloatToConvert = ArgAPFloat;
9505 bool LosesInfo = true;
9507 &LosesInfo);
9508
9509 return (!LosesInfo && !APFloatToConvert.isDenormal());
9510}
9511
9512static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9513 unsigned &Opcode) {
9514 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9515 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9516 return false;
9517
9518 EVT Ty = Op->getValueType(0);
9519 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9520 // as we cannot handle extending loads for these types.
9521 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9522 ISD::isNON_EXTLoad(InputNode))
9523 return true;
9524
9525 EVT MemVT = InputNode->getMemoryVT();
9526 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9527 // memory VT is the same vector element VT type.
9528 // The loads feeding into the v8i16 and v16i8 types will be extending because
9529 // scalar i8/i16 are not legal types.
9530 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9531 (MemVT == Ty.getVectorElementType()))
9532 return true;
9533
9534 if (Ty == MVT::v2i64) {
9535 // Check the extend type, when the input type is i32, and the output vector
9536 // type is v2i64.
9537 if (MemVT == MVT::i32) {
9538 if (ISD::isZEXTLoad(InputNode))
9539 Opcode = PPCISD::ZEXT_LD_SPLAT;
9540 if (ISD::isSEXTLoad(InputNode))
9541 Opcode = PPCISD::SEXT_LD_SPLAT;
9542 }
9543 return true;
9544 }
9545 return false;
9546}
9547
9549 bool IsLittleEndian) {
9550 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9551
9552 BitMask.clearAllBits();
9553 EVT VT = BVN.getValueType(0);
9554 unsigned VTSize = VT.getSizeInBits();
9555 APInt ConstValue(VTSize, 0);
9556
9557 unsigned EltWidth = VT.getScalarSizeInBits();
9558
9559 unsigned BitPos = 0;
9560 for (auto OpVal : BVN.op_values()) {
9561 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9562
9563 if (!CN)
9564 return false;
9565 // The elements in a vector register are ordered in reverse byte order
9566 // between little-endian and big-endian modes.
9567 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9568 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9569 BitPos += EltWidth;
9570 }
9571
9572 for (unsigned J = 0; J < 16; ++J) {
9573 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9574 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9575 return false;
9576 if (ExtractValue == 0xFF)
9577 BitMask.setBit(J);
9578 }
9579 return true;
9580}
9581
9582// If this is a case we can't handle, return null and let the default
9583// expansion code take care of it. If we CAN select this case, and if it
9584// selects to a single instruction, return Op. Otherwise, if we can codegen
9585// this case more efficiently than a constant pool load, lower it to the
9586// sequence of ops that should be used.
9587SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9588 SelectionDAG &DAG) const {
9589 SDLoc dl(Op);
9590 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9591 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9592
9593 if (Subtarget.hasP10Vector()) {
9594 APInt BitMask(32, 0);
9595 // If the value of the vector is all zeros or all ones,
9596 // we do not convert it to MTVSRBMI.
9597 // The xxleqv instruction sets a vector with all ones.
9598 // The xxlxor instruction sets a vector with all zeros.
9599 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9600 BitMask != 0 && BitMask != 0xffff) {
9601 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9602 MachineSDNode *MSDNode =
9603 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9604 SDValue SDV = SDValue(MSDNode, 0);
9605 EVT DVT = BVN->getValueType(0);
9606 EVT SVT = SDV.getValueType();
9607 if (SVT != DVT) {
9608 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9609 }
9610 return SDV;
9611 }
9612 // Recognize build vector patterns to emit VSX vector instructions
9613 // instead of loading value from memory.
9614 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9615 return VecPat;
9616 }
9617 // Check if this is a splat of a constant value.
9618 APInt APSplatBits, APSplatUndef;
9619 unsigned SplatBitSize;
9620 bool HasAnyUndefs;
9621 bool BVNIsConstantSplat =
9622 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9623 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9624
9625 // If it is a splat of a double, check if we can shrink it to a 32 bit
9626 // non-denormal float which when converted back to double gives us the same
9627 // double. This is to exploit the XXSPLTIDP instruction.
9628 // If we lose precision, we use XXSPLTI32DX.
9629 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9630 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9631 // Check the type first to short-circuit so we don't modify APSplatBits if
9632 // this block isn't executed.
9633 if ((Op->getValueType(0) == MVT::v2f64) &&
9634 convertToNonDenormSingle(APSplatBits)) {
9635 SDValue SplatNode = DAG.getNode(
9636 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9637 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9638 return DAG.getBitcast(Op.getValueType(), SplatNode);
9639 } else {
9640 // We may lose precision, so we have to use XXSPLTI32DX.
9641
9642 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9643 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9644 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9645
9646 if (!Hi || !Lo)
9647 // If either load is 0, then we should generate XXLXOR to set to 0.
9648 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9649
9650 if (Hi)
9651 SplatNode = DAG.getNode(
9652 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9653 DAG.getTargetConstant(0, dl, MVT::i32),
9654 DAG.getTargetConstant(Hi, dl, MVT::i32));
9655
9656 if (Lo)
9657 SplatNode =
9658 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9659 DAG.getTargetConstant(1, dl, MVT::i32),
9660 DAG.getTargetConstant(Lo, dl, MVT::i32));
9661
9662 return DAG.getBitcast(Op.getValueType(), SplatNode);
9663 }
9664 }
9665
9666 bool IsSplat64 = false;
9667 uint64_t SplatBits = 0;
9668 int32_t SextVal = 0;
9669 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9670 SplatBits = APSplatBits.getZExtValue();
9671 if (SplatBitSize <= 32) {
9672 SextVal = SignExtend32(SplatBits, SplatBitSize);
9673 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9674 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9675 bool P9Vector = Subtarget.hasP9Vector();
9676 int32_t Hi = P9Vector ? 127 : 15;
9677 int32_t Lo = P9Vector ? -128 : -16;
9678 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9679 SextVal = static_cast<int32_t>(SplatBits);
9680 }
9681 }
9682
9683 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9684 unsigned NewOpcode = PPCISD::LD_SPLAT;
9685
9686 // Handle load-and-splat patterns as we have instructions that will do this
9687 // in one go.
9688 if (DAG.isSplatValue(Op, true) &&
9689 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9690 const SDValue *InputLoad = &Op.getOperand(0);
9691 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9692
9693 // If the input load is an extending load, it will be an i32 -> i64
9694 // extending load and isValidSplatLoad() will update NewOpcode.
9695 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9696 unsigned ElementSize =
9697 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9698
9699 assert(((ElementSize == 2 * MemorySize)
9700 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9701 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9702 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9703 "Unmatched element size and opcode!\n");
9704
9705 // Checking for a single use of this load, we have to check for vector
9706 // width (128 bits) / ElementSize uses (since each operand of the
9707 // BUILD_VECTOR is a separate use of the value.
9708 unsigned NumUsesOfInputLD = 128 / ElementSize;
9709 for (SDValue BVInOp : Op->ops())
9710 if (BVInOp.isUndef())
9711 NumUsesOfInputLD--;
9712
9713 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9714 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9715 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9716 // 15", but function IsValidSplatLoad() now will only return true when
9717 // the data at index 0 is not nullptr. So we will not get into trouble for
9718 // these cases.
9719 //
9720 // case 1 - lfiwzx/lfiwax
9721 // 1.1: load result is i32 and is sign/zero extend to i64;
9722 // 1.2: build a v2i64 vector type with above loaded value;
9723 // 1.3: the vector has only one value at index 0, others are all undef;
9724 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9725 if (NumUsesOfInputLD == 1 &&
9726 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9727 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9728 Subtarget.hasLFIWAX()))
9729 return SDValue();
9730
9731 // case 2 - lxvr[hb]x
9732 // 2.1: load result is at most i16;
9733 // 2.2: build a vector with above loaded value;
9734 // 2.3: the vector has only one value at index 0, others are all undef;
9735 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9736 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9737 Subtarget.isISA3_1() && ElementSize <= 16)
9738 return SDValue();
9739
9740 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9741 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9742 Subtarget.hasVSX()) {
9743 SDValue Ops[] = {
9744 LD->getChain(), // Chain
9745 LD->getBasePtr(), // Ptr
9746 DAG.getValueType(Op.getValueType()) // VT
9747 };
9748 SDValue LdSplt = DAG.getMemIntrinsicNode(
9749 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9750 LD->getMemoryVT(), LD->getMemOperand());
9751 // Replace all uses of the output chain of the original load with the
9752 // output chain of the new load.
9753 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9754 LdSplt.getValue(1));
9755 return LdSplt;
9756 }
9757 }
9758
9759 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9760 // 32-bits can be lowered to VSX instructions under certain conditions.
9761 // Without VSX, there is no pattern more efficient than expanding the node.
9762 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9763 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9764 Subtarget.hasP8Vector()))
9765 return Op;
9766 return SDValue();
9767 }
9768
9769 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9770 unsigned SplatSize = SplatBitSize / 8;
9771
9772 // First, handle single instruction cases.
9773
9774 // All zeros?
9775 if (SplatBits == 0) {
9776 // Canonicalize all zero vectors to be v4i32.
9777 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9778 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9779 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9780 }
9781 return Op;
9782 }
9783
9784 // We have XXSPLTIW for constant splats four bytes wide.
9785 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9786 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9787 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9788 // turned into a 4-byte splat of 0xABABABAB.
9789 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9790 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9791 Op.getValueType(), DAG, dl);
9792
9793 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9794 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9795 dl);
9796
9797 // We have XXSPLTIB for constant splats one byte wide.
9798 if (Subtarget.hasP9Vector() && SplatSize == 1)
9799 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9800 dl);
9801
9802 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9803 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9804 if (SextVal >= -16 && SextVal <= 15) {
9805 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9806 // generate a splat word with extend for size 8.
9807 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9808 SDValue Res =
9809 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9810 if (SplatSize != 8)
9811 return Res;
9812 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9813 }
9814
9815 // Two instruction sequences.
9816
9817 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9818 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9820 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9821 unsigned IID;
9822 EVT VT;
9823 switch (SplatSize) {
9824 default:
9825 llvm_unreachable("Unexpected type for vector constant.");
9826 case 2:
9827 IID = Intrinsic::ppc_altivec_vupklsb;
9828 VT = MVT::v8i16;
9829 break;
9830 case 4:
9831 IID = Intrinsic::ppc_altivec_vextsb2w;
9832 VT = MVT::v4i32;
9833 break;
9834 case 8:
9835 IID = Intrinsic::ppc_altivec_vextsb2d;
9836 VT = MVT::v2i64;
9837 break;
9838 }
9839 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9840 return DAG.getBitcast(Op->getValueType(0), Extend);
9841 }
9842 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9843
9844 // If this value is in the range [-32,30] and is even, use:
9845 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9846 // If this value is in the range [17,31] and is odd, use:
9847 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9848 // If this value is in the range [-31,-17] and is odd, use:
9849 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9850 // Note the last two are three-instruction sequences.
9851 if (SextVal >= -32 && SextVal <= 31) {
9852 // To avoid having these optimizations undone by constant folding,
9853 // we convert to a pseudo that will be expanded later into one of
9854 // the above forms.
9855 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9856 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9857 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9858 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9859 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9860 if (VT == Op.getValueType())
9861 return RetVal;
9862 else
9863 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9864 }
9865
9866 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9867 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9868 // for fneg/fabs.
9869 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9870 // Make -1 and vspltisw -1:
9871 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9872
9873 // Make the VSLW intrinsic, computing 0x8000_0000.
9874 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9875 OnesV, DAG, dl);
9876
9877 // xor by OnesV to invert it.
9878 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9879 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9880 }
9881
9882 // Check to see if this is a wide variety of vsplti*, binop self cases.
9883 static const signed char SplatCsts[] = {
9884 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9885 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9886 };
9887
9888 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9889 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9890 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9891 int i = SplatCsts[idx];
9892
9893 // Figure out what shift amount will be used by altivec if shifted by i in
9894 // this splat size.
9895 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9896
9897 // vsplti + shl self.
9898 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9899 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9900 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9901 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9902 Intrinsic::ppc_altivec_vslw
9903 };
9904 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9905 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9906 }
9907
9908 // vsplti + srl self.
9909 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9910 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9911 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9912 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9913 Intrinsic::ppc_altivec_vsrw
9914 };
9915 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9916 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9917 }
9918
9919 // vsplti + rol self.
9920 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9921 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9922 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9923 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9924 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9925 Intrinsic::ppc_altivec_vrlw
9926 };
9927 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9928 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9929 }
9930
9931 // t = vsplti c, result = vsldoi t, t, 1
9932 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9933 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9934 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9935 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9936 }
9937 // t = vsplti c, result = vsldoi t, t, 2
9938 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9939 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9940 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9941 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9942 }
9943 // t = vsplti c, result = vsldoi t, t, 3
9944 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9945 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9946 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9947 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9948 }
9949 }
9950
9951 return SDValue();
9952}
9953
9954/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9955/// the specified operations to build the shuffle.
9957 SDValue RHS, SelectionDAG &DAG,
9958 const SDLoc &dl) {
9959 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9960 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9961 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9962
9963 enum {
9964 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9965 OP_VMRGHW,
9966 OP_VMRGLW,
9967 OP_VSPLTISW0,
9968 OP_VSPLTISW1,
9969 OP_VSPLTISW2,
9970 OP_VSPLTISW3,
9971 OP_VSLDOI4,
9972 OP_VSLDOI8,
9973 OP_VSLDOI12
9974 };
9975
9976 if (OpNum == OP_COPY) {
9977 if (LHSID == (1*9+2)*9+3) return LHS;
9978 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9979 return RHS;
9980 }
9981
9982 SDValue OpLHS, OpRHS;
9983 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9984 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9985
9986 int ShufIdxs[16];
9987 switch (OpNum) {
9988 default: llvm_unreachable("Unknown i32 permute!");
9989 case OP_VMRGHW:
9990 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9991 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9992 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9993 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9994 break;
9995 case OP_VMRGLW:
9996 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9997 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9998 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9999 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10000 break;
10001 case OP_VSPLTISW0:
10002 for (unsigned i = 0; i != 16; ++i)
10003 ShufIdxs[i] = (i&3)+0;
10004 break;
10005 case OP_VSPLTISW1:
10006 for (unsigned i = 0; i != 16; ++i)
10007 ShufIdxs[i] = (i&3)+4;
10008 break;
10009 case OP_VSPLTISW2:
10010 for (unsigned i = 0; i != 16; ++i)
10011 ShufIdxs[i] = (i&3)+8;
10012 break;
10013 case OP_VSPLTISW3:
10014 for (unsigned i = 0; i != 16; ++i)
10015 ShufIdxs[i] = (i&3)+12;
10016 break;
10017 case OP_VSLDOI4:
10018 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10019 case OP_VSLDOI8:
10020 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10021 case OP_VSLDOI12:
10022 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10023 }
10024 EVT VT = OpLHS.getValueType();
10025 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10026 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10027 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10028 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10029}
10030
10031/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10032/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10033/// SDValue.
10034SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10035 SelectionDAG &DAG) const {
10036 const unsigned BytesInVector = 16;
10037 bool IsLE = Subtarget.isLittleEndian();
10038 SDLoc dl(N);
10039 SDValue V1 = N->getOperand(0);
10040 SDValue V2 = N->getOperand(1);
10041 unsigned ShiftElts = 0, InsertAtByte = 0;
10042 bool Swap = false;
10043
10044 // Shifts required to get the byte we want at element 7.
10045 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10046 0, 15, 14, 13, 12, 11, 10, 9};
10047 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10048 1, 2, 3, 4, 5, 6, 7, 8};
10049
10050 ArrayRef<int> Mask = N->getMask();
10051 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10052
10053 // For each mask element, find out if we're just inserting something
10054 // from V2 into V1 or vice versa.
10055 // Possible permutations inserting an element from V2 into V1:
10056 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10057 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10058 // ...
10059 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10060 // Inserting from V1 into V2 will be similar, except mask range will be
10061 // [16,31].
10062
10063 bool FoundCandidate = false;
10064 // If both vector operands for the shuffle are the same vector, the mask
10065 // will contain only elements from the first one and the second one will be
10066 // undef.
10067 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10068 // Go through the mask of half-words to find an element that's being moved
10069 // from one vector to the other.
10070 for (unsigned i = 0; i < BytesInVector; ++i) {
10071 unsigned CurrentElement = Mask[i];
10072 // If 2nd operand is undefined, we should only look for element 7 in the
10073 // Mask.
10074 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10075 continue;
10076
10077 bool OtherElementsInOrder = true;
10078 // Examine the other elements in the Mask to see if they're in original
10079 // order.
10080 for (unsigned j = 0; j < BytesInVector; ++j) {
10081 if (j == i)
10082 continue;
10083 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10084 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10085 // in which we always assume we're always picking from the 1st operand.
10086 int MaskOffset =
10087 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10088 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10089 OtherElementsInOrder = false;
10090 break;
10091 }
10092 }
10093 // If other elements are in original order, we record the number of shifts
10094 // we need to get the element we want into element 7. Also record which byte
10095 // in the vector we should insert into.
10096 if (OtherElementsInOrder) {
10097 // If 2nd operand is undefined, we assume no shifts and no swapping.
10098 if (V2.isUndef()) {
10099 ShiftElts = 0;
10100 Swap = false;
10101 } else {
10102 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10103 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10104 : BigEndianShifts[CurrentElement & 0xF];
10105 Swap = CurrentElement < BytesInVector;
10106 }
10107 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10108 FoundCandidate = true;
10109 break;
10110 }
10111 }
10112
10113 if (!FoundCandidate)
10114 return SDValue();
10115
10116 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10117 // optionally with VECSHL if shift is required.
10118 if (Swap)
10119 std::swap(V1, V2);
10120 if (V2.isUndef())
10121 V2 = V1;
10122 if (ShiftElts) {
10123 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10124 DAG.getConstant(ShiftElts, dl, MVT::i32));
10125 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10126 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10127 }
10128 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10129 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10130}
10131
10132/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10133/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10134/// SDValue.
10135SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10136 SelectionDAG &DAG) const {
10137 const unsigned NumHalfWords = 8;
10138 const unsigned BytesInVector = NumHalfWords * 2;
10139 // Check that the shuffle is on half-words.
10140 if (!isNByteElemShuffleMask(N, 2, 1))
10141 return SDValue();
10142
10143 bool IsLE = Subtarget.isLittleEndian();
10144 SDLoc dl(N);
10145 SDValue V1 = N->getOperand(0);
10146 SDValue V2 = N->getOperand(1);
10147 unsigned ShiftElts = 0, InsertAtByte = 0;
10148 bool Swap = false;
10149
10150 // Shifts required to get the half-word we want at element 3.
10151 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10152 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10153
10154 uint32_t Mask = 0;
10155 uint32_t OriginalOrderLow = 0x1234567;
10156 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10157 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10158 // 32-bit space, only need 4-bit nibbles per element.
10159 for (unsigned i = 0; i < NumHalfWords; ++i) {
10160 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10161 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10162 }
10163
10164 // For each mask element, find out if we're just inserting something
10165 // from V2 into V1 or vice versa. Possible permutations inserting an element
10166 // from V2 into V1:
10167 // X, 1, 2, 3, 4, 5, 6, 7
10168 // 0, X, 2, 3, 4, 5, 6, 7
10169 // 0, 1, X, 3, 4, 5, 6, 7
10170 // 0, 1, 2, X, 4, 5, 6, 7
10171 // 0, 1, 2, 3, X, 5, 6, 7
10172 // 0, 1, 2, 3, 4, X, 6, 7
10173 // 0, 1, 2, 3, 4, 5, X, 7
10174 // 0, 1, 2, 3, 4, 5, 6, X
10175 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10176
10177 bool FoundCandidate = false;
10178 // Go through the mask of half-words to find an element that's being moved
10179 // from one vector to the other.
10180 for (unsigned i = 0; i < NumHalfWords; ++i) {
10181 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10182 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10183 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10184 uint32_t TargetOrder = 0x0;
10185
10186 // If both vector operands for the shuffle are the same vector, the mask
10187 // will contain only elements from the first one and the second one will be
10188 // undef.
10189 if (V2.isUndef()) {
10190 ShiftElts = 0;
10191 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10192 TargetOrder = OriginalOrderLow;
10193 Swap = false;
10194 // Skip if not the correct element or mask of other elements don't equal
10195 // to our expected order.
10196 if (MaskOneElt == VINSERTHSrcElem &&
10197 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10198 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10199 FoundCandidate = true;
10200 break;
10201 }
10202 } else { // If both operands are defined.
10203 // Target order is [8,15] if the current mask is between [0,7].
10204 TargetOrder =
10205 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10206 // Skip if mask of other elements don't equal our expected order.
10207 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10208 // We only need the last 3 bits for the number of shifts.
10209 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10210 : BigEndianShifts[MaskOneElt & 0x7];
10211 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10212 Swap = MaskOneElt < NumHalfWords;
10213 FoundCandidate = true;
10214 break;
10215 }
10216 }
10217 }
10218
10219 if (!FoundCandidate)
10220 return SDValue();
10221
10222 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10223 // optionally with VECSHL if shift is required.
10224 if (Swap)
10225 std::swap(V1, V2);
10226 if (V2.isUndef())
10227 V2 = V1;
10228 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10229 if (ShiftElts) {
10230 // Double ShiftElts because we're left shifting on v16i8 type.
10231 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10232 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10233 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10234 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10235 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10236 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10237 }
10238 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10239 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10240 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10241 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10242}
10243
10244/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10245/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10246/// return the default SDValue.
10247SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10248 SelectionDAG &DAG) const {
10249 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10250 // to v16i8. Peek through the bitcasts to get the actual operands.
10253
10254 auto ShuffleMask = SVN->getMask();
10255 SDValue VecShuffle(SVN, 0);
10256 SDLoc DL(SVN);
10257
10258 // Check that we have a four byte shuffle.
10259 if (!isNByteElemShuffleMask(SVN, 4, 1))
10260 return SDValue();
10261
10262 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10263 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10264 std::swap(LHS, RHS);
10266 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10267 if (!CommutedSV)
10268 return SDValue();
10269 ShuffleMask = CommutedSV->getMask();
10270 }
10271
10272 // Ensure that the RHS is a vector of constants.
10273 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10274 if (!BVN)
10275 return SDValue();
10276
10277 // Check if RHS is a splat of 4-bytes (or smaller).
10278 APInt APSplatValue, APSplatUndef;
10279 unsigned SplatBitSize;
10280 bool HasAnyUndefs;
10281 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10282 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10283 SplatBitSize > 32)
10284 return SDValue();
10285
10286 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10287 // The instruction splats a constant C into two words of the source vector
10288 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10289 // Thus we check that the shuffle mask is the equivalent of
10290 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10291 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10292 // within each word are consecutive, so we only need to check the first byte.
10293 SDValue Index;
10294 bool IsLE = Subtarget.isLittleEndian();
10295 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10296 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10297 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10298 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10299 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10300 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10301 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10302 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10303 else
10304 return SDValue();
10305
10306 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10307 // for XXSPLTI32DX.
10308 unsigned SplatVal = APSplatValue.getZExtValue();
10309 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10310 SplatVal |= (SplatVal << SplatBitSize);
10311
10312 SDValue SplatNode = DAG.getNode(
10313 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10314 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10315 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10316}
10317
10318/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10319/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10320/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10321/// i.e (or (shl x, C1), (srl x, 128-C1)).
10322SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10323 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10324 assert(Op.getValueType() == MVT::v1i128 &&
10325 "Only set v1i128 as custom, other type shouldn't reach here!");
10326 SDLoc dl(Op);
10327 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10328 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10329 unsigned SHLAmt = N1.getConstantOperandVal(0);
10330 if (SHLAmt % 8 == 0) {
10331 std::array<int, 16> Mask;
10332 std::iota(Mask.begin(), Mask.end(), 0);
10333 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10334 if (SDValue Shuffle =
10335 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10336 DAG.getUNDEF(MVT::v16i8), Mask))
10337 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10338 }
10339 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10340 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10341 DAG.getConstant(SHLAmt, dl, MVT::i32));
10342 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10343 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10344 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10345 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10346}
10347
10348/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10349/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10350/// return the code it can be lowered into. Worst case, it can always be
10351/// lowered into a vperm.
10352SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10353 SelectionDAG &DAG) const {
10354 SDLoc dl(Op);
10355 SDValue V1 = Op.getOperand(0);
10356 SDValue V2 = Op.getOperand(1);
10357 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10358
10359 // Any nodes that were combined in the target-independent combiner prior
10360 // to vector legalization will not be sent to the target combine. Try to
10361 // combine it here.
10362 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10363 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10364 return NewShuffle;
10365 Op = NewShuffle;
10367 V1 = Op.getOperand(0);
10368 V2 = Op.getOperand(1);
10369 }
10370 EVT VT = Op.getValueType();
10371 bool isLittleEndian = Subtarget.isLittleEndian();
10372
10373 unsigned ShiftElts, InsertAtByte;
10374 bool Swap = false;
10375
10376 // If this is a load-and-splat, we can do that with a single instruction
10377 // in some cases. However if the load has multiple uses, we don't want to
10378 // combine it because that will just produce multiple loads.
10379 bool IsPermutedLoad = false;
10380 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10381 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10382 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10383 InputLoad->hasOneUse()) {
10384 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10385 int SplatIdx =
10386 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10387
10388 // The splat index for permuted loads will be in the left half of the vector
10389 // which is strictly wider than the loaded value by 8 bytes. So we need to
10390 // adjust the splat index to point to the correct address in memory.
10391 if (IsPermutedLoad) {
10392 assert((isLittleEndian || IsFourByte) &&
10393 "Unexpected size for permuted load on big endian target");
10394 SplatIdx += IsFourByte ? 2 : 1;
10395 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10396 "Splat of a value outside of the loaded memory");
10397 }
10398
10399 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10400 // For 4-byte load-and-splat, we need Power9.
10401 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10402 uint64_t Offset = 0;
10403 if (IsFourByte)
10404 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10405 else
10406 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10407
10408 // If the width of the load is the same as the width of the splat,
10409 // loading with an offset would load the wrong memory.
10410 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10411 Offset = 0;
10412
10413 SDValue BasePtr = LD->getBasePtr();
10414 if (Offset != 0)
10416 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10417 SDValue Ops[] = {
10418 LD->getChain(), // Chain
10419 BasePtr, // BasePtr
10420 DAG.getValueType(Op.getValueType()) // VT
10421 };
10422 SDVTList VTL =
10423 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10424 SDValue LdSplt =
10425 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10426 Ops, LD->getMemoryVT(), LD->getMemOperand());
10427 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10428 if (LdSplt.getValueType() != SVOp->getValueType(0))
10429 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10430 return LdSplt;
10431 }
10432 }
10433
10434 // All v2i64 and v2f64 shuffles are legal
10435 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10436 return Op;
10437
10438 if (Subtarget.hasP9Vector() &&
10439 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10440 isLittleEndian)) {
10441 if (V2.isUndef())
10442 V2 = V1;
10443 else if (Swap)
10444 std::swap(V1, V2);
10445 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10446 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10447 if (ShiftElts) {
10448 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10449 DAG.getConstant(ShiftElts, dl, MVT::i32));
10450 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10451 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10452 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10453 }
10454 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10455 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10456 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10457 }
10458
10459 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10460 SDValue SplatInsertNode;
10461 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10462 return SplatInsertNode;
10463 }
10464
10465 if (Subtarget.hasP9Altivec()) {
10466 SDValue NewISDNode;
10467 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10468 return NewISDNode;
10469
10470 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10471 return NewISDNode;
10472 }
10473
10474 if (Subtarget.hasVSX() &&
10475 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10476 if (Swap)
10477 std::swap(V1, V2);
10478 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10479 SDValue Conv2 =
10480 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10481
10482 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10483 DAG.getConstant(ShiftElts, dl, MVT::i32));
10484 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10485 }
10486
10487 if (Subtarget.hasVSX() &&
10488 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10489 if (Swap)
10490 std::swap(V1, V2);
10491 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10492 SDValue Conv2 =
10493 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10494
10495 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10496 DAG.getConstant(ShiftElts, dl, MVT::i32));
10497 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10498 }
10499
10500 if (Subtarget.hasP9Vector()) {
10501 if (PPC::isXXBRHShuffleMask(SVOp)) {
10502 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10503 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10504 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10505 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10506 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10507 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10508 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10509 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10510 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10511 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10512 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10513 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10514 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10515 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10516 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10517 }
10518 }
10519
10520 if (Subtarget.hasVSX()) {
10521 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10522 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10523
10524 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10525 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10526 DAG.getConstant(SplatIdx, dl, MVT::i32));
10527 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10528 }
10529
10530 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10531 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10532 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10533 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10534 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10535 }
10536 }
10537
10538 // Cases that are handled by instructions that take permute immediates
10539 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10540 // selected by the instruction selector.
10541 if (V2.isUndef()) {
10542 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10543 PPC::isSplatShuffleMask(SVOp, 2) ||
10544 PPC::isSplatShuffleMask(SVOp, 4) ||
10545 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10546 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10547 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10548 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10549 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10550 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10551 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10552 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10553 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10554 (Subtarget.hasP8Altivec() && (
10555 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10556 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10557 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10558 return Op;
10559 }
10560 }
10561
10562 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10563 // and produce a fixed permutation. If any of these match, do not lower to
10564 // VPERM.
10565 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10566 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10567 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10568 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10569 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10570 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10571 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10572 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10573 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10574 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10575 (Subtarget.hasP8Altivec() && (
10576 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10577 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10578 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10579 return Op;
10580
10581 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10582 // perfect shuffle table to emit an optimal matching sequence.
10583 ArrayRef<int> PermMask = SVOp->getMask();
10584
10585 if (!DisablePerfectShuffle && !isLittleEndian) {
10586 unsigned PFIndexes[4];
10587 bool isFourElementShuffle = true;
10588 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10589 ++i) { // Element number
10590 unsigned EltNo = 8; // Start out undef.
10591 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10592 if (PermMask[i * 4 + j] < 0)
10593 continue; // Undef, ignore it.
10594
10595 unsigned ByteSource = PermMask[i * 4 + j];
10596 if ((ByteSource & 3) != j) {
10597 isFourElementShuffle = false;
10598 break;
10599 }
10600
10601 if (EltNo == 8) {
10602 EltNo = ByteSource / 4;
10603 } else if (EltNo != ByteSource / 4) {
10604 isFourElementShuffle = false;
10605 break;
10606 }
10607 }
10608 PFIndexes[i] = EltNo;
10609 }
10610
10611 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10612 // perfect shuffle vector to determine if it is cost effective to do this as
10613 // discrete instructions, or whether we should use a vperm.
10614 // For now, we skip this for little endian until such time as we have a
10615 // little-endian perfect shuffle table.
10616 if (isFourElementShuffle) {
10617 // Compute the index in the perfect shuffle table.
10618 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10619 PFIndexes[2] * 9 + PFIndexes[3];
10620
10621 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10622 unsigned Cost = (PFEntry >> 30);
10623
10624 // Determining when to avoid vperm is tricky. Many things affect the cost
10625 // of vperm, particularly how many times the perm mask needs to be
10626 // computed. For example, if the perm mask can be hoisted out of a loop or
10627 // is already used (perhaps because there are multiple permutes with the
10628 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10629 // permute mask out of the loop requires an extra register.
10630 //
10631 // As a compromise, we only emit discrete instructions if the shuffle can
10632 // be generated in 3 or fewer operations. When we have loop information
10633 // available, if this block is within a loop, we should avoid using vperm
10634 // for 3-operation perms and use a constant pool load instead.
10635 if (Cost < 3)
10636 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10637 }
10638 }
10639
10640 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10641 // vector that will get spilled to the constant pool.
10642 if (V2.isUndef()) V2 = V1;
10643
10644 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10645}
10646
10647SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10648 ArrayRef<int> PermMask, EVT VT,
10649 SDValue V1, SDValue V2) const {
10650 unsigned Opcode = PPCISD::VPERM;
10651 EVT ValType = V1.getValueType();
10652 SDLoc dl(Op);
10653 bool NeedSwap = false;
10654 bool isLittleEndian = Subtarget.isLittleEndian();
10655 bool isPPC64 = Subtarget.isPPC64();
10656
10657 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10658 (V1->hasOneUse() || V2->hasOneUse())) {
10659 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10660 "XXPERM instead\n");
10661 Opcode = PPCISD::XXPERM;
10662
10663 // The second input to XXPERM is also an output so if the second input has
10664 // multiple uses then copying is necessary, as a result we want the
10665 // single-use operand to be used as the second input to prevent copying.
10666 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10667 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10668 std::swap(V1, V2);
10669 NeedSwap = !NeedSwap;
10670 }
10671 }
10672
10673 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10674 // that it is in input element units, not in bytes. Convert now.
10675
10676 // For little endian, the order of the input vectors is reversed, and
10677 // the permutation mask is complemented with respect to 31. This is
10678 // necessary to produce proper semantics with the big-endian-based vperm
10679 // instruction.
10680 EVT EltVT = V1.getValueType().getVectorElementType();
10681 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10682
10683 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10684 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10685
10686 /*
10687 Vectors will be appended like so: [ V1 | v2 ]
10688 XXSWAPD on V1:
10689 [ A | B | C | D ] -> [ C | D | A | B ]
10690 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10691 i.e. index of A, B += 8, and index of C, D -= 8.
10692 XXSWAPD on V2:
10693 [ E | F | G | H ] -> [ G | H | E | F ]
10694 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10695 i.e. index of E, F += 8, index of G, H -= 8
10696 Swap V1 and V2:
10697 [ V1 | V2 ] -> [ V2 | V1 ]
10698 0-15 16-31 0-15 16-31
10699 i.e. index of V1 += 16, index of V2 -= 16
10700 */
10701
10702 SmallVector<SDValue, 16> ResultMask;
10703 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10704 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10705
10706 if (V1HasXXSWAPD) {
10707 if (SrcElt < 8)
10708 SrcElt += 8;
10709 else if (SrcElt < 16)
10710 SrcElt -= 8;
10711 }
10712 if (V2HasXXSWAPD) {
10713 if (SrcElt > 23)
10714 SrcElt -= 8;
10715 else if (SrcElt > 15)
10716 SrcElt += 8;
10717 }
10718 if (NeedSwap) {
10719 if (SrcElt < 16)
10720 SrcElt += 16;
10721 else
10722 SrcElt -= 16;
10723 }
10724 for (unsigned j = 0; j != BytesPerElement; ++j)
10725 if (isLittleEndian)
10726 ResultMask.push_back(
10727 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10728 else
10729 ResultMask.push_back(
10730 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10731 }
10732
10733 if (V1HasXXSWAPD) {
10734 dl = SDLoc(V1->getOperand(0));
10735 V1 = V1->getOperand(0)->getOperand(1);
10736 }
10737 if (V2HasXXSWAPD) {
10738 dl = SDLoc(V2->getOperand(0));
10739 V2 = V2->getOperand(0)->getOperand(1);
10740 }
10741
10742 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10743 if (ValType != MVT::v2f64)
10744 V1 = DAG.getBitcast(MVT::v2f64, V1);
10745 if (V2.getValueType() != MVT::v2f64)
10746 V2 = DAG.getBitcast(MVT::v2f64, V2);
10747 }
10748
10749 ShufflesHandledWithVPERM++;
10750 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10751 LLVM_DEBUG({
10752 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10753 if (Opcode == PPCISD::XXPERM) {
10754 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10755 } else {
10756 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10757 }
10758 SVOp->dump();
10759 dbgs() << "With the following permute control vector:\n";
10760 VPermMask.dump();
10761 });
10762
10763 if (Opcode == PPCISD::XXPERM)
10764 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10765
10766 // Only need to place items backwards in LE,
10767 // the mask was properly calculated.
10768 if (isLittleEndian)
10769 std::swap(V1, V2);
10770
10771 SDValue VPERMNode =
10772 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10773
10774 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10775 return VPERMNode;
10776}
10777
10778/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10779/// vector comparison. If it is, return true and fill in Opc/isDot with
10780/// information about the intrinsic.
10781static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10782 bool &isDot, const PPCSubtarget &Subtarget) {
10783 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10784 CompareOpc = -1;
10785 isDot = false;
10786 switch (IntrinsicID) {
10787 default:
10788 return false;
10789 // Comparison predicates.
10790 case Intrinsic::ppc_altivec_vcmpbfp_p:
10791 CompareOpc = 966;
10792 isDot = true;
10793 break;
10794 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10795 CompareOpc = 198;
10796 isDot = true;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpequb_p:
10799 CompareOpc = 6;
10800 isDot = true;
10801 break;
10802 case Intrinsic::ppc_altivec_vcmpequh_p:
10803 CompareOpc = 70;
10804 isDot = true;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpequw_p:
10807 CompareOpc = 134;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpequd_p:
10811 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10812 CompareOpc = 199;
10813 isDot = true;
10814 } else
10815 return false;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpneb_p:
10818 case Intrinsic::ppc_altivec_vcmpneh_p:
10819 case Intrinsic::ppc_altivec_vcmpnew_p:
10820 case Intrinsic::ppc_altivec_vcmpnezb_p:
10821 case Intrinsic::ppc_altivec_vcmpnezh_p:
10822 case Intrinsic::ppc_altivec_vcmpnezw_p:
10823 if (Subtarget.hasP9Altivec()) {
10824 switch (IntrinsicID) {
10825 default:
10826 llvm_unreachable("Unknown comparison intrinsic.");
10827 case Intrinsic::ppc_altivec_vcmpneb_p:
10828 CompareOpc = 7;
10829 break;
10830 case Intrinsic::ppc_altivec_vcmpneh_p:
10831 CompareOpc = 71;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpnew_p:
10834 CompareOpc = 135;
10835 break;
10836 case Intrinsic::ppc_altivec_vcmpnezb_p:
10837 CompareOpc = 263;
10838 break;
10839 case Intrinsic::ppc_altivec_vcmpnezh_p:
10840 CompareOpc = 327;
10841 break;
10842 case Intrinsic::ppc_altivec_vcmpnezw_p:
10843 CompareOpc = 391;
10844 break;
10845 }
10846 isDot = true;
10847 } else
10848 return false;
10849 break;
10850 case Intrinsic::ppc_altivec_vcmpgefp_p:
10851 CompareOpc = 454;
10852 isDot = true;
10853 break;
10854 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10855 CompareOpc = 710;
10856 isDot = true;
10857 break;
10858 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10859 CompareOpc = 774;
10860 isDot = true;
10861 break;
10862 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10863 CompareOpc = 838;
10864 isDot = true;
10865 break;
10866 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10867 CompareOpc = 902;
10868 isDot = true;
10869 break;
10870 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10871 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10872 CompareOpc = 967;
10873 isDot = true;
10874 } else
10875 return false;
10876 break;
10877 case Intrinsic::ppc_altivec_vcmpgtub_p:
10878 CompareOpc = 518;
10879 isDot = true;
10880 break;
10881 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10882 CompareOpc = 582;
10883 isDot = true;
10884 break;
10885 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10886 CompareOpc = 646;
10887 isDot = true;
10888 break;
10889 case Intrinsic::ppc_altivec_vcmpgtud_p:
10890 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10891 CompareOpc = 711;
10892 isDot = true;
10893 } else
10894 return false;
10895 break;
10896
10897 case Intrinsic::ppc_altivec_vcmpequq:
10898 case Intrinsic::ppc_altivec_vcmpgtsq:
10899 case Intrinsic::ppc_altivec_vcmpgtuq:
10900 if (!Subtarget.isISA3_1())
10901 return false;
10902 switch (IntrinsicID) {
10903 default:
10904 llvm_unreachable("Unknown comparison intrinsic.");
10905 case Intrinsic::ppc_altivec_vcmpequq:
10906 CompareOpc = 455;
10907 break;
10908 case Intrinsic::ppc_altivec_vcmpgtsq:
10909 CompareOpc = 903;
10910 break;
10911 case Intrinsic::ppc_altivec_vcmpgtuq:
10912 CompareOpc = 647;
10913 break;
10914 }
10915 break;
10916
10917 // VSX predicate comparisons use the same infrastructure
10918 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10919 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10920 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10921 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10922 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10923 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10924 if (Subtarget.hasVSX()) {
10925 switch (IntrinsicID) {
10926 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10927 CompareOpc = 99;
10928 break;
10929 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10930 CompareOpc = 115;
10931 break;
10932 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10933 CompareOpc = 107;
10934 break;
10935 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10936 CompareOpc = 67;
10937 break;
10938 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10939 CompareOpc = 83;
10940 break;
10941 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10942 CompareOpc = 75;
10943 break;
10944 }
10945 isDot = true;
10946 } else
10947 return false;
10948 break;
10949
10950 // Normal Comparisons.
10951 case Intrinsic::ppc_altivec_vcmpbfp:
10952 CompareOpc = 966;
10953 break;
10954 case Intrinsic::ppc_altivec_vcmpeqfp:
10955 CompareOpc = 198;
10956 break;
10957 case Intrinsic::ppc_altivec_vcmpequb:
10958 CompareOpc = 6;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpequh:
10961 CompareOpc = 70;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpequw:
10964 CompareOpc = 134;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpequd:
10967 if (Subtarget.hasP8Altivec())
10968 CompareOpc = 199;
10969 else
10970 return false;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpneb:
10973 case Intrinsic::ppc_altivec_vcmpneh:
10974 case Intrinsic::ppc_altivec_vcmpnew:
10975 case Intrinsic::ppc_altivec_vcmpnezb:
10976 case Intrinsic::ppc_altivec_vcmpnezh:
10977 case Intrinsic::ppc_altivec_vcmpnezw:
10978 if (Subtarget.hasP9Altivec())
10979 switch (IntrinsicID) {
10980 default:
10981 llvm_unreachable("Unknown comparison intrinsic.");
10982 case Intrinsic::ppc_altivec_vcmpneb:
10983 CompareOpc = 7;
10984 break;
10985 case Intrinsic::ppc_altivec_vcmpneh:
10986 CompareOpc = 71;
10987 break;
10988 case Intrinsic::ppc_altivec_vcmpnew:
10989 CompareOpc = 135;
10990 break;
10991 case Intrinsic::ppc_altivec_vcmpnezb:
10992 CompareOpc = 263;
10993 break;
10994 case Intrinsic::ppc_altivec_vcmpnezh:
10995 CompareOpc = 327;
10996 break;
10997 case Intrinsic::ppc_altivec_vcmpnezw:
10998 CompareOpc = 391;
10999 break;
11000 }
11001 else
11002 return false;
11003 break;
11004 case Intrinsic::ppc_altivec_vcmpgefp:
11005 CompareOpc = 454;
11006 break;
11007 case Intrinsic::ppc_altivec_vcmpgtfp:
11008 CompareOpc = 710;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtsb:
11011 CompareOpc = 774;
11012 break;
11013 case Intrinsic::ppc_altivec_vcmpgtsh:
11014 CompareOpc = 838;
11015 break;
11016 case Intrinsic::ppc_altivec_vcmpgtsw:
11017 CompareOpc = 902;
11018 break;
11019 case Intrinsic::ppc_altivec_vcmpgtsd:
11020 if (Subtarget.hasP8Altivec())
11021 CompareOpc = 967;
11022 else
11023 return false;
11024 break;
11025 case Intrinsic::ppc_altivec_vcmpgtub:
11026 CompareOpc = 518;
11027 break;
11028 case Intrinsic::ppc_altivec_vcmpgtuh:
11029 CompareOpc = 582;
11030 break;
11031 case Intrinsic::ppc_altivec_vcmpgtuw:
11032 CompareOpc = 646;
11033 break;
11034 case Intrinsic::ppc_altivec_vcmpgtud:
11035 if (Subtarget.hasP8Altivec())
11036 CompareOpc = 711;
11037 else
11038 return false;
11039 break;
11040 case Intrinsic::ppc_altivec_vcmpequq_p:
11041 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11042 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11043 if (!Subtarget.isISA3_1())
11044 return false;
11045 switch (IntrinsicID) {
11046 default:
11047 llvm_unreachable("Unknown comparison intrinsic.");
11048 case Intrinsic::ppc_altivec_vcmpequq_p:
11049 CompareOpc = 455;
11050 break;
11051 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11052 CompareOpc = 903;
11053 break;
11054 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11055 CompareOpc = 647;
11056 break;
11057 }
11058 isDot = true;
11059 break;
11060 }
11061 return true;
11062}
11063
11064/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11065/// lower, do it, otherwise return null.
11066SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11067 SelectionDAG &DAG) const {
11068 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11069
11070 SDLoc dl(Op);
11071 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11072 // but the builtin provides it as a scalar. To satisfy the instruction
11073 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11074 auto MapNodeWithSplatVector =
11075 [&](unsigned Opcode,
11076 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11077 SDValue SplatVal =
11078 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11079
11080 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11081 Ops.append(ExtraOps.begin(), ExtraOps.end());
11082 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11083 };
11084
11085 switch (IntrinsicID) {
11086 case Intrinsic::thread_pointer:
11087 // Reads the thread pointer register, used for __builtin_thread_pointer.
11088 if (Subtarget.isPPC64())
11089 return DAG.getRegister(PPC::X13, MVT::i64);
11090 return DAG.getRegister(PPC::R2, MVT::i32);
11091
11092 case Intrinsic::ppc_rldimi: {
11093 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11094 SDValue Src = Op.getOperand(1);
11095 APInt Mask = Op.getConstantOperandAPInt(4);
11096 if (Mask.isZero())
11097 return Op.getOperand(2);
11098 if (Mask.isAllOnes())
11099 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11100 uint64_t SH = Op.getConstantOperandVal(3);
11101 unsigned MB = 0, ME = 0;
11102 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11103 report_fatal_error("invalid rldimi mask!");
11104 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11105 if (ME < 63 - SH) {
11106 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11107 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11108 } else if (ME > 63 - SH) {
11109 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11110 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11111 }
11112 return SDValue(
11113 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11114 {Op.getOperand(2), Src,
11115 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11116 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11117 0);
11118 }
11119
11120 case Intrinsic::ppc_rlwimi: {
11121 APInt Mask = Op.getConstantOperandAPInt(4);
11122 if (Mask.isZero())
11123 return Op.getOperand(2);
11124 if (Mask.isAllOnes())
11125 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11126 Op.getOperand(3));
11127 unsigned MB = 0, ME = 0;
11128 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11129 report_fatal_error("invalid rlwimi mask!");
11130 return SDValue(DAG.getMachineNode(
11131 PPC::RLWIMI, dl, MVT::i32,
11132 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11133 DAG.getTargetConstant(MB, dl, MVT::i32),
11134 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11135 0);
11136 }
11137
11138 case Intrinsic::ppc_bcdshift:
11139 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11140 case Intrinsic::ppc_bcdshiftround:
11141 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11142 case Intrinsic::ppc_bcdtruncate:
11143 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11144 case Intrinsic::ppc_bcdunsignedtruncate:
11145 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11146 case Intrinsic::ppc_bcdunsignedshift:
11147 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11148
11149 case Intrinsic::ppc_rlwnm: {
11150 if (Op.getConstantOperandVal(3) == 0)
11151 return DAG.getConstant(0, dl, MVT::i32);
11152 unsigned MB = 0, ME = 0;
11153 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11154 report_fatal_error("invalid rlwnm mask!");
11155 return SDValue(
11156 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11157 {Op.getOperand(1), Op.getOperand(2),
11158 DAG.getTargetConstant(MB, dl, MVT::i32),
11159 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11160 0);
11161 }
11162
11163 case Intrinsic::ppc_mma_disassemble_acc: {
11164 if (Subtarget.isISAFuture()) {
11165 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11166 SDValue WideVec =
11167 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11168 Op.getOperand(1)),
11169 0);
11171 SDValue Value = SDValue(WideVec.getNode(), 0);
11172 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11173
11174 SDValue Extract;
11175 Extract = DAG.getNode(
11176 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11177 Subtarget.isLittleEndian() ? Value2 : Value,
11178 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11179 dl, getPointerTy(DAG.getDataLayout())));
11180 RetOps.push_back(Extract);
11181 Extract = DAG.getNode(
11182 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11183 Subtarget.isLittleEndian() ? Value2 : Value,
11184 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11185 dl, getPointerTy(DAG.getDataLayout())));
11186 RetOps.push_back(Extract);
11187 Extract = DAG.getNode(
11188 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11189 Subtarget.isLittleEndian() ? Value : Value2,
11190 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11191 dl, getPointerTy(DAG.getDataLayout())));
11192 RetOps.push_back(Extract);
11193 Extract = DAG.getNode(
11194 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11195 Subtarget.isLittleEndian() ? Value : Value2,
11196 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11197 dl, getPointerTy(DAG.getDataLayout())));
11198 RetOps.push_back(Extract);
11199 return DAG.getMergeValues(RetOps, dl);
11200 }
11201 [[fallthrough]];
11202 }
11203 case Intrinsic::ppc_vsx_disassemble_pair: {
11204 int NumVecs = 2;
11205 SDValue WideVec = Op.getOperand(1);
11206 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11207 NumVecs = 4;
11208 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11209 }
11211 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11212 SDValue Extract = DAG.getNode(
11213 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11214 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11215 : VecNo,
11216 dl, getPointerTy(DAG.getDataLayout())));
11217 RetOps.push_back(Extract);
11218 }
11219 return DAG.getMergeValues(RetOps, dl);
11220 }
11221
11222 case Intrinsic::ppc_build_dmr: {
11225 for (int i = 1; i < 9; i += 2) {
11226 SDValue Hi = Op.getOperand(i);
11227 SDValue Lo = Op.getOperand(i + 1);
11228 if (Hi->getOpcode() == ISD::LOAD)
11229 Chains.push_back(Hi.getValue(1));
11230 if (Lo->getOpcode() == ISD::LOAD)
11231 Chains.push_back(Lo.getValue(1));
11232 Pairs.push_back(
11233 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11234 }
11235 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11236 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11237 return DAG.getMergeValues({Value, TF}, dl);
11238 }
11239
11240 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11241 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11242 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11243 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11244 "Specify P of 0 or 1 for lower or upper 512 bytes");
11245 unsigned HiLo = Idx->getSExtValue();
11246 unsigned Opcode;
11247 unsigned Subx;
11248 if (HiLo == 0) {
11249 Opcode = PPC::DMXXEXTFDMR512;
11250 Subx = PPC::sub_wacc_lo;
11251 } else {
11252 Opcode = PPC::DMXXEXTFDMR512_HI;
11253 Subx = PPC::sub_wacc_hi;
11254 }
11255 SDValue Subreg(
11256 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11257 Op.getOperand(1),
11258 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11259 0);
11260 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11261 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11262 }
11263
11264 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11265 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11266 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11267 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11268 "Specify a dmr row pair 0-3");
11269 unsigned IdxVal = Idx->getSExtValue();
11270 unsigned Subx;
11271 switch (IdxVal) {
11272 case 0:
11273 Subx = PPC::sub_dmrrowp0;
11274 break;
11275 case 1:
11276 Subx = PPC::sub_dmrrowp1;
11277 break;
11278 case 2:
11279 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11280 break;
11281 case 3:
11282 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11283 break;
11284 }
11285 SDValue Subreg(
11286 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11287 Op.getOperand(1),
11288 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11289 0);
11290 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11291 return SDValue(
11292 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11293 0);
11294 }
11295
11296 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11297 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11298 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11299 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11300 "Specify P of 0 or 1 for lower or upper 512 bytes");
11301 unsigned HiLo = Idx->getSExtValue();
11302 unsigned Opcode;
11303 unsigned Subx;
11304 if (HiLo == 0) {
11305 Opcode = PPCISD::INST512;
11306 Subx = PPC::sub_wacc_lo;
11307 } else {
11308 Opcode = PPCISD::INST512HI;
11309 Subx = PPC::sub_wacc_hi;
11310 }
11311 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11312 Op.getOperand(3));
11313 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11314 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11315 Op.getOperand(1), Wacc, SubReg),
11316 0);
11317 }
11318
11319 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11320 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11321 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11322 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11323 "Specify a dmr row pair 0-3");
11324 unsigned IdxVal = Idx->getSExtValue();
11325 unsigned Subx;
11326 switch (IdxVal) {
11327 case 0:
11328 Subx = PPC::sub_dmrrowp0;
11329 break;
11330 case 1:
11331 Subx = PPC::sub_dmrrowp1;
11332 break;
11333 case 2:
11334 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11335 break;
11336 case 3:
11337 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11338 break;
11339 }
11340 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11341 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11342 SDValue DMRRowp =
11343 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11344 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11345 Op.getOperand(1), DMRRowp, SubReg),
11346 0);
11347 }
11348
11349 case Intrinsic::ppc_mma_xxmfacc:
11350 case Intrinsic::ppc_mma_xxmtacc: {
11351 // Allow pre-isa-future subtargets to lower as normal.
11352 if (!Subtarget.isISAFuture())
11353 return SDValue();
11354 // The intrinsics for xxmtacc and xxmfacc take one argument of
11355 // type v512i1, for future cpu the corresponding wacc instruction
11356 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11357 // the need to produce the xxm[t|f]acc.
11358 SDValue WideVec = Op.getOperand(1);
11359 DAG.ReplaceAllUsesWith(Op, WideVec);
11360 return SDValue();
11361 }
11362
11363 case Intrinsic::ppc_unpack_longdouble: {
11364 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11365 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11366 "Argument of long double unpack must be 0 or 1!");
11367 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11368 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11369 Idx->getValueType(0)));
11370 }
11371
11372 case Intrinsic::ppc_compare_exp_lt:
11373 case Intrinsic::ppc_compare_exp_gt:
11374 case Intrinsic::ppc_compare_exp_eq:
11375 case Intrinsic::ppc_compare_exp_uo: {
11376 unsigned Pred;
11377 switch (IntrinsicID) {
11378 case Intrinsic::ppc_compare_exp_lt:
11379 Pred = PPC::PRED_LT;
11380 break;
11381 case Intrinsic::ppc_compare_exp_gt:
11382 Pred = PPC::PRED_GT;
11383 break;
11384 case Intrinsic::ppc_compare_exp_eq:
11385 Pred = PPC::PRED_EQ;
11386 break;
11387 case Intrinsic::ppc_compare_exp_uo:
11388 Pred = PPC::PRED_UN;
11389 break;
11390 }
11391 return SDValue(
11392 DAG.getMachineNode(
11393 PPC::SELECT_CC_I4, dl, MVT::i32,
11394 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11395 Op.getOperand(1), Op.getOperand(2)),
11396 0),
11397 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11398 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11399 0);
11400 }
11401 case Intrinsic::ppc_test_data_class: {
11402 EVT OpVT = Op.getOperand(1).getValueType();
11403 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11404 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11405 : PPC::XSTSTDCSP);
11406 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11407 // The XSTSTDC* instructions test if a floating-point value matches any of
11408 // the data classes specified in the mask, setting CR field bits
11409 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11410 // convert it to an integer result (1 if match, 0 if no match).
11411 //
11412 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11413 // intrinsic provides (value, mask) as Op.getOperand(1) and
11414 // Op.getOperand(2).
11415 SDValue TestDataClass =
11416 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11417 {Op.getOperand(2), Op.getOperand(1)}),
11418 0);
11419 if (Subtarget.isISA3_1()) {
11420 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11421 // This is more efficient than the SELECT_CC approach used in earlier
11422 // ISAs.
11423 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11424 SDValue CRBit =
11425 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11426 TestDataClass, SubRegIdx),
11427 0);
11428
11429 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11430 }
11431
11432 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11433 return SDValue(
11434 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11435 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11436 DAG.getConstant(0, dl, MVT::i32),
11437 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11438 0);
11439 }
11440 case Intrinsic::ppc_fnmsub: {
11441 EVT VT = Op.getOperand(1).getValueType();
11442 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11443 return DAG.getNode(
11444 ISD::FNEG, dl, VT,
11445 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11446 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11447 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11448 Op.getOperand(2), Op.getOperand(3));
11449 }
11450 case Intrinsic::ppc_convert_f128_to_ppcf128:
11451 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11452 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11453 ? RTLIB::CONVERT_PPCF128_F128
11454 : RTLIB::CONVERT_F128_PPCF128;
11455 MakeLibCallOptions CallOptions;
11456 std::pair<SDValue, SDValue> Result =
11457 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11458 dl, SDValue());
11459 return Result.first;
11460 }
11461 case Intrinsic::ppc_maxfe:
11462 case Intrinsic::ppc_maxfl:
11463 case Intrinsic::ppc_maxfs:
11464 case Intrinsic::ppc_minfe:
11465 case Intrinsic::ppc_minfl:
11466 case Intrinsic::ppc_minfs: {
11467 EVT VT = Op.getValueType();
11468 assert(
11469 all_of(Op->ops().drop_front(4),
11470 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11471 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11472 (void)VT;
11474 if (IntrinsicID == Intrinsic::ppc_minfe ||
11475 IntrinsicID == Intrinsic::ppc_minfl ||
11476 IntrinsicID == Intrinsic::ppc_minfs)
11477 CC = ISD::SETLT;
11478 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11479 SDValue Res = Op.getOperand(I);
11480 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11481 Res =
11482 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11483 }
11484 return Res;
11485 }
11486 }
11487
11488 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11489 // opcode number of the comparison.
11490 int CompareOpc;
11491 bool isDot;
11492 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11493 return SDValue(); // Don't custom lower most intrinsics.
11494
11495 // If this is a non-dot comparison, make the VCMP node and we are done.
11496 if (!isDot) {
11497 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11498 Op.getOperand(1), Op.getOperand(2),
11499 DAG.getConstant(CompareOpc, dl, MVT::i32));
11500 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11501 }
11502
11503 // Create the PPCISD altivec 'dot' comparison node.
11504 SDValue Ops[] = {
11505 Op.getOperand(2), // LHS
11506 Op.getOperand(3), // RHS
11507 DAG.getConstant(CompareOpc, dl, MVT::i32)
11508 };
11509 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11510 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11511
11512 // Unpack the result based on how the target uses it.
11513 unsigned BitNo; // Bit # of CR6.
11514 bool InvertBit; // Invert result?
11515 unsigned Bitx;
11516 unsigned SetOp;
11517 switch (Op.getConstantOperandVal(1)) {
11518 default: // Can't happen, don't crash on invalid number though.
11519 case 0: // Return the value of the EQ bit of CR6.
11520 BitNo = 0;
11521 InvertBit = false;
11522 Bitx = PPC::sub_eq;
11523 SetOp = PPCISD::SETBC;
11524 break;
11525 case 1: // Return the inverted value of the EQ bit of CR6.
11526 BitNo = 0;
11527 InvertBit = true;
11528 Bitx = PPC::sub_eq;
11529 SetOp = PPCISD::SETBCR;
11530 break;
11531 case 2: // Return the value of the LT bit of CR6.
11532 BitNo = 2;
11533 InvertBit = false;
11534 Bitx = PPC::sub_lt;
11535 SetOp = PPCISD::SETBC;
11536 break;
11537 case 3: // Return the inverted value of the LT bit of CR6.
11538 BitNo = 2;
11539 InvertBit = true;
11540 Bitx = PPC::sub_lt;
11541 SetOp = PPCISD::SETBCR;
11542 break;
11543 }
11544
11545 SDValue GlueOp = CompNode.getValue(1);
11546 if (Subtarget.isISA3_1()) {
11547 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11548 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11549 SDValue CRBit =
11550 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11551 CR6Reg, SubRegIdx, GlueOp),
11552 0);
11553 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11554 }
11555
11556 // Now that we have the comparison, emit a copy from the CR to a GPR.
11557 // This is flagged to the above dot comparison.
11558 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11559 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11560
11561 // Shift the bit into the low position.
11562 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11563 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11564 // Isolate the bit.
11565 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11566 DAG.getConstant(1, dl, MVT::i32));
11567
11568 // If we are supposed to, toggle the bit.
11569 if (InvertBit)
11570 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11571 DAG.getConstant(1, dl, MVT::i32));
11572 return Flags;
11573}
11574
11575SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11576 SelectionDAG &DAG) const {
11577 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11578 // the beginning of the argument list.
11579 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11580 SDLoc DL(Op);
11581 switch (Op.getConstantOperandVal(ArgStart)) {
11582 case Intrinsic::ppc_cfence: {
11583 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11584 SDValue Val = Op.getOperand(ArgStart + 1);
11585 EVT Ty = Val.getValueType();
11586 if (Ty == MVT::i128) {
11587 // FIXME: Testing one of two paired registers is sufficient to guarantee
11588 // ordering?
11589 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11590 }
11591 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11592 return SDValue(
11593 DAG.getMachineNode(
11594 Opcode, DL, MVT::Other,
11595 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11596 Op.getOperand(0)),
11597 0);
11598 }
11599 case Intrinsic::ppc_disassemble_dmr: {
11600 assert(ArgStart == 1 &&
11601 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11602 return DAG.getStore(Op.getOperand(0), DL, Op.getOperand(ArgStart + 2),
11603 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11604 }
11605 case Intrinsic::ppc_amo_stwat:
11606 case Intrinsic::ppc_amo_stdat: {
11607 SDLoc dl(Op);
11608 SDValue Chain = Op.getOperand(0);
11609 SDValue Ptr = Op.getOperand(ArgStart + 1);
11610 SDValue Val = Op.getOperand(ArgStart + 2);
11611 SDValue FC = Op.getOperand(ArgStart + 3);
11612
11613 return DAG.getNode(PPCISD::STAT, dl, MVT::Other, Chain, Val, Ptr, FC);
11614 }
11615 default:
11616 break;
11617 }
11618 return SDValue();
11619}
11620
11621// Lower scalar BSWAP64 to xxbrd.
11622SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11623 SDLoc dl(Op);
11624 if (!Subtarget.isPPC64())
11625 return Op;
11626 // MTVSRDD
11627 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11628 Op.getOperand(0));
11629 // XXBRD
11630 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11631 // MFVSRD
11632 int VectorIndex = 0;
11633 if (Subtarget.isLittleEndian())
11634 VectorIndex = 1;
11635 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11636 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11637 return Op;
11638}
11639
11640// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11641// compared to a value that is atomically loaded (atomic loads zero-extend).
11642SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11643 SelectionDAG &DAG) const {
11644 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11645 "Expecting an atomic compare-and-swap here.");
11646 SDLoc dl(Op);
11647 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11648 EVT MemVT = AtomicNode->getMemoryVT();
11649 if (MemVT.getSizeInBits() >= 32)
11650 return Op;
11651
11652 SDValue CmpOp = Op.getOperand(2);
11653 // If this is already correctly zero-extended, leave it alone.
11654 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11655 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11656 return Op;
11657
11658 // Clear the high bits of the compare operand.
11659 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11660 SDValue NewCmpOp =
11661 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11662 DAG.getConstant(MaskVal, dl, MVT::i32));
11663
11664 // Replace the existing compare operand with the properly zero-extended one.
11666 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11667 Ops.push_back(AtomicNode->getOperand(i));
11668 Ops[2] = NewCmpOp;
11669 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11670 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11671 auto NodeTy =
11672 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11673 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11674}
11675
11676SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11677 SelectionDAG &DAG) const {
11678 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11679 EVT MemVT = N->getMemoryVT();
11680 assert(MemVT.getSimpleVT() == MVT::i128 &&
11681 "Expect quadword atomic operations");
11682 SDLoc dl(N);
11683 unsigned Opc = N->getOpcode();
11684 switch (Opc) {
11685 case ISD::ATOMIC_LOAD: {
11686 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11687 // lowered to ppc instructions by pattern matching instruction selector.
11688 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11690 N->getOperand(0),
11691 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11692 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11693 Ops.push_back(N->getOperand(I));
11694 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11695 Ops, MemVT, N->getMemOperand());
11696 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11697 SDValue ValHi =
11698 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11699 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11700 DAG.getConstant(64, dl, MVT::i32));
11701 SDValue Val =
11702 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11703 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11704 {Val, LoadedVal.getValue(2)});
11705 }
11706 case ISD::ATOMIC_STORE: {
11707 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11708 // lowered to ppc instructions by pattern matching instruction selector.
11709 SDVTList Tys = DAG.getVTList(MVT::Other);
11711 N->getOperand(0),
11712 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11713 SDValue Val = N->getOperand(1);
11714 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11715 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11716 DAG.getConstant(64, dl, MVT::i32));
11717 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11718 Ops.push_back(ValLo);
11719 Ops.push_back(ValHi);
11720 Ops.push_back(N->getOperand(2));
11721 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11722 N->getMemOperand());
11723 }
11724 default:
11725 llvm_unreachable("Unexpected atomic opcode");
11726 }
11727}
11728
11730 SelectionDAG &DAG,
11731 const PPCSubtarget &Subtarget) {
11732 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11733
11734 enum DataClassMask {
11735 DC_NAN = 1 << 6,
11736 DC_NEG_INF = 1 << 4,
11737 DC_POS_INF = 1 << 5,
11738 DC_NEG_ZERO = 1 << 2,
11739 DC_POS_ZERO = 1 << 3,
11740 DC_NEG_SUBNORM = 1,
11741 DC_POS_SUBNORM = 1 << 1,
11742 };
11743
11744 EVT VT = Op.getValueType();
11745
11746 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11747 : VT == MVT::f64 ? PPC::XSTSTDCDP
11748 : PPC::XSTSTDCSP;
11749
11750 if (Mask == fcAllFlags)
11751 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11752 if (Mask == 0)
11753 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11754
11755 // When it's cheaper or necessary to test reverse flags.
11756 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11757 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11758 return DAG.getNOT(Dl, Rev, MVT::i1);
11759 }
11760
11761 // Power doesn't support testing whether a value is 'normal'. Test the rest
11762 // first, and test if it's 'not not-normal' with expected sign.
11763 if (Mask & fcNormal) {
11764 SDValue Rev(DAG.getMachineNode(
11765 TestOp, Dl, MVT::i32,
11766 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11767 DC_NEG_ZERO | DC_POS_ZERO |
11768 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11769 Dl, MVT::i32),
11770 Op),
11771 0);
11772 // Sign are stored in CR bit 0, result are in CR bit 2.
11773 SDValue Sign(
11774 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11775 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11776 0);
11777 SDValue Normal(DAG.getNOT(
11778 Dl,
11780 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11781 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11782 0),
11783 MVT::i1));
11784 if (Mask & fcPosNormal)
11785 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11786 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11787 if (Mask == fcPosNormal || Mask == fcNegNormal)
11788 return Result;
11789
11790 return DAG.getNode(
11791 ISD::OR, Dl, MVT::i1,
11792 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11793 }
11794
11795 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11796 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11797 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11798 bool IsQuiet = Mask & fcQNan;
11799 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11800
11801 // Quietness is determined by the first bit in fraction field.
11802 uint64_t QuietMask = 0;
11803 SDValue HighWord;
11804 if (VT == MVT::f128) {
11805 HighWord = DAG.getNode(
11806 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11807 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11808 QuietMask = 0x8000;
11809 } else if (VT == MVT::f64) {
11810 if (Subtarget.isPPC64()) {
11811 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11812 DAG.getBitcast(MVT::i64, Op),
11813 DAG.getConstant(1, Dl, MVT::i32));
11814 } else {
11815 SDValue Vec = DAG.getBitcast(
11816 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11817 HighWord = DAG.getNode(
11818 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11819 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11820 }
11821 QuietMask = 0x80000;
11822 } else if (VT == MVT::f32) {
11823 HighWord = DAG.getBitcast(MVT::i32, Op);
11824 QuietMask = 0x400000;
11825 }
11826 SDValue NanRes = DAG.getSetCC(
11827 Dl, MVT::i1,
11828 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11829 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11830 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11831 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11832 if (Mask == fcQNan || Mask == fcSNan)
11833 return NanRes;
11834
11835 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11836 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11837 NanRes);
11838 }
11839
11840 unsigned NativeMask = 0;
11841 if ((Mask & fcNan) == fcNan)
11842 NativeMask |= DC_NAN;
11843 if (Mask & fcNegInf)
11844 NativeMask |= DC_NEG_INF;
11845 if (Mask & fcPosInf)
11846 NativeMask |= DC_POS_INF;
11847 if (Mask & fcNegZero)
11848 NativeMask |= DC_NEG_ZERO;
11849 if (Mask & fcPosZero)
11850 NativeMask |= DC_POS_ZERO;
11851 if (Mask & fcNegSubnormal)
11852 NativeMask |= DC_NEG_SUBNORM;
11853 if (Mask & fcPosSubnormal)
11854 NativeMask |= DC_POS_SUBNORM;
11855 return SDValue(
11856 DAG.getMachineNode(
11857 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11859 TestOp, Dl, MVT::i32,
11860 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11861 0),
11862 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11863 0);
11864}
11865
11866SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11867 SelectionDAG &DAG) const {
11868 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11869 SDValue LHS = Op.getOperand(0);
11870 uint64_t RHSC = Op.getConstantOperandVal(1);
11871 SDLoc Dl(Op);
11872 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11873 if (LHS.getValueType() == MVT::ppcf128) {
11874 // The higher part determines the value class.
11875 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11876 DAG.getConstant(1, Dl, MVT::i32));
11877 }
11878
11879 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11880}
11881
11882// Adjust the length value for a load/store with length to account for the
11883// instructions requiring a left justified length, and for non-byte element
11884// types requiring scaling by element size.
11885static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11886 SelectionDAG &DAG) {
11887 SDLoc dl(Val);
11888 EVT VT = Val->getValueType(0);
11889 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11890 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11891 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11892 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11893}
11894
11895SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11896 auto VPLD = cast<VPLoadSDNode>(Op);
11897 bool Future = Subtarget.isISAFuture();
11898 SDLoc dl(Op);
11899 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11900 "Mask predication not supported");
11901 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11902 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11903 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11904 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11905 Len = AdjustLength(Len, EltBits, !Future, DAG);
11906 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11907 VPLD->getOperand(1), Len};
11908 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11909 SDValue VPL =
11911 VPLD->getMemoryVT(), VPLD->getMemOperand());
11912 return VPL;
11913}
11914
11915SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11916 auto VPST = cast<VPStoreSDNode>(Op);
11917 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11918 "Mask predication not supported");
11919 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11920 SDLoc dl(Op);
11921 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11922 unsigned EltBits =
11923 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11924 bool Future = Subtarget.isISAFuture();
11925 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11926 Len = AdjustLength(Len, EltBits, !Future, DAG);
11927 SDValue Ops[] = {
11928 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11929 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11930 VPST->getOperand(2), Len};
11931 SDVTList Tys = DAG.getVTList(MVT::Other);
11932 SDValue VPS =
11934 VPST->getMemoryVT(), VPST->getMemOperand());
11935 return VPS;
11936}
11937
11938SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11939 SelectionDAG &DAG) const {
11940 SDLoc dl(Op);
11941
11942 MachineFunction &MF = DAG.getMachineFunction();
11943 SDValue Op0 = Op.getOperand(0);
11944 EVT ValVT = Op0.getValueType();
11945 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11946 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11947 int64_t IntVal = Op.getConstantOperandVal(0);
11948 if (IntVal >= -16 && IntVal <= 15)
11949 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11950 dl);
11951 }
11952
11953 ReuseLoadInfo RLI;
11954 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11955 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11956 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11957 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11958
11959 MachineMemOperand *MMO =
11961 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11962 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11964 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11965 MVT::i32, MMO);
11966 if (RLI.ResChain)
11967 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11968 return Bits.getValue(0);
11969 }
11970
11971 // Create a stack slot that is 16-byte aligned.
11972 MachineFrameInfo &MFI = MF.getFrameInfo();
11973 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11974 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11975 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11976
11977 SDValue Val = Op0;
11978 // P10 hardware store forwarding requires that a single store contains all
11979 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11980 // to avoid load hit store on P10 when running binaries compiled for older
11981 // processors by generating two mergeable scalar stores to forward with the
11982 // vector load.
11983 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11984 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11985 ValVT.getSizeInBits() <= 64) {
11986 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11987 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11988 SDValue ShiftBy = DAG.getConstant(
11989 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11990 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11991 SDValue Plus8 =
11992 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11993 SDValue Store2 =
11994 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11995 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11996 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11997 MachinePointerInfo());
11998 }
11999
12000 // Store the input value into Value#0 of the stack slot.
12001 SDValue Store =
12002 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12003 // Load it out.
12004 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12005}
12006
12007SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12008 SelectionDAG &DAG) const {
12009 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12010 "Should only be called for ISD::INSERT_VECTOR_ELT");
12011
12012 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12013
12014 EVT VT = Op.getValueType();
12015 SDLoc dl(Op);
12016 SDValue V1 = Op.getOperand(0);
12017 SDValue V2 = Op.getOperand(1);
12018
12019 if (VT == MVT::v2f64 && C)
12020 return Op;
12021
12022 if (Subtarget.hasP9Vector()) {
12023 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12024 // because on P10, it allows this specific insert_vector_elt load pattern to
12025 // utilize the refactored load and store infrastructure in order to exploit
12026 // prefixed loads.
12027 // On targets with inexpensive direct moves (Power9 and up), a
12028 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12029 // load since a single precision load will involve conversion to double
12030 // precision on the load followed by another conversion to single precision.
12031 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12032 (isa<LoadSDNode>(V2))) {
12033 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12034 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12035 SDValue InsVecElt =
12036 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12037 BitcastLoad, Op.getOperand(2));
12038 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12039 }
12040 }
12041
12042 if (Subtarget.isISA3_1()) {
12043 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12044 return SDValue();
12045 // On P10, we have legal lowering for constant and variable indices for
12046 // all vectors.
12047 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12048 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12049 return Op;
12050 }
12051
12052 // Before P10, we have legal lowering for constant indices but not for
12053 // variable ones.
12054 if (!C)
12055 return SDValue();
12056
12057 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12058 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12059 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12060 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12061 unsigned InsertAtElement = C->getZExtValue();
12062 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12063 if (Subtarget.isLittleEndian()) {
12064 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12065 }
12066 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12067 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12068 }
12069 return Op;
12070}
12071
12072SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12073 SelectionDAG &DAG) const {
12074 SDLoc dl(Op);
12075 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12076 SDValue LoadChain = LN->getChain();
12077 SDValue BasePtr = LN->getBasePtr();
12078 EVT VT = Op.getValueType();
12079 bool IsV1024i1 = VT == MVT::v1024i1;
12080 bool IsV2048i1 = VT == MVT::v2048i1;
12081
12082 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12083 // Dense Math dmr pair registers, respectively.
12084 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12085 (void)IsV2048i1;
12086 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12087 "Dense Math support required.");
12088 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12089
12091 SmallVector<SDValue, 8> LoadChains;
12092
12093 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12094 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12095 MachineMemOperand *MMO = LN->getMemOperand();
12096 unsigned NumVecs = VT.getSizeInBits() / 256;
12097 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12098 MachineMemOperand *NewMMO =
12099 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12100 if (Idx > 0) {
12101 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12102 DAG.getConstant(32, dl, BasePtr.getValueType()));
12103 LoadOps[2] = BasePtr;
12104 }
12106 DAG.getVTList(MVT::v256i1, MVT::Other),
12107 LoadOps, MVT::v256i1, NewMMO);
12108 LoadChains.push_back(Ld.getValue(1));
12109 Loads.push_back(Ld);
12110 }
12111
12112 if (Subtarget.isLittleEndian()) {
12113 std::reverse(Loads.begin(), Loads.end());
12114 std::reverse(LoadChains.begin(), LoadChains.end());
12115 }
12116
12117 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12118 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12119
12120 if (IsV1024i1) {
12121 return DAG.getMergeValues({Value, TF}, dl);
12122 }
12123
12124 // Handle Loads for V2048i1 which represents a dmr pair.
12125 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12126 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12127
12128 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12129 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12130
12131 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12132 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12133
12134 SDValue DmrPValue = SDValue(
12135 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12136
12137 return DAG.getMergeValues({DmrPValue, TF}, dl);
12138}
12139
12140SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12141 const SDLoc &dl,
12142 SelectionDAG &DAG) const {
12143 SDValue Lo =
12144 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12145 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12146 SDValue Hi =
12147 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12148 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12149 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12150
12151 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12152 {RC, Lo, LoSub, Hi, HiSub}),
12153 0);
12154}
12155
12156SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12157 SelectionDAG &DAG) const {
12158 SDLoc dl(Op);
12159 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12160 SDValue LoadChain = LN->getChain();
12161 SDValue BasePtr = LN->getBasePtr();
12162 EVT VT = Op.getValueType();
12163
12164 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12165 return LowerDMFVectorLoad(Op, DAG);
12166
12167 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12168 return Op;
12169
12170 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12171 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12172 "Type unsupported without MMA");
12173 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12174 "Type unsupported without paired vector support");
12175
12176 // For v256i1 on ISA Future, let the load go through to instruction selection
12177 // where it will be matched to lxvp/plxvp by the instruction patterns.
12178 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12179 return Op;
12180
12181 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12182 // value in 2 or 4 vsx registers.
12183 Align Alignment = LN->getAlign();
12185 SmallVector<SDValue, 4> LoadChains;
12186 unsigned NumVecs = VT.getSizeInBits() / 128;
12187 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12188 SDValue Load =
12189 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12190 LN->getPointerInfo().getWithOffset(Idx * 16),
12191 commonAlignment(Alignment, Idx * 16),
12192 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12193 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12194 DAG.getConstant(16, dl, BasePtr.getValueType()));
12195 Loads.push_back(Load);
12196 LoadChains.push_back(Load.getValue(1));
12197 }
12198 if (Subtarget.isLittleEndian()) {
12199 std::reverse(Loads.begin(), Loads.end());
12200 std::reverse(LoadChains.begin(), LoadChains.end());
12201 }
12202 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12203 SDValue Value =
12204 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12205 dl, VT, Loads);
12206 SDValue RetOps[] = {Value, TF};
12207 return DAG.getMergeValues(RetOps, dl);
12208}
12209
12210SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12211 SelectionDAG &DAG) const {
12212
12213 SDLoc dl(Op);
12214 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12215 SDValue StoreChain = SN->getChain();
12216 SDValue BasePtr = SN->getBasePtr();
12219 EVT VT = SN->getValue().getValueType();
12220 bool IsV1024i1 = VT == MVT::v1024i1;
12221 bool IsV2048i1 = VT == MVT::v2048i1;
12222
12223 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12224 // Dense Math dmr pair registers, respectively.
12225 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12226 (void)IsV2048i1;
12227 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12228 "Dense Math support required.");
12229 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12230
12231 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12232 if (IsV1024i1) {
12234 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12235 Op.getOperand(1),
12236 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12237 0);
12239 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12240 Op.getOperand(1),
12241 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12242 0);
12243 MachineSDNode *ExtNode =
12244 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12245 Values.push_back(SDValue(ExtNode, 0));
12246 Values.push_back(SDValue(ExtNode, 1));
12247 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12248 Values.push_back(SDValue(ExtNode, 0));
12249 Values.push_back(SDValue(ExtNode, 1));
12250 } else {
12251 // This corresponds to v2048i1 which represents a dmr pair.
12252 SDValue Dmr0(
12253 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12254 Op.getOperand(1),
12255 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12256 0);
12257
12258 SDValue Dmr1(
12259 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12260 Op.getOperand(1),
12261 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12262 0);
12263
12264 SDValue Dmr0Lo(DAG.getMachineNode(
12265 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12266 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12267 0);
12268
12269 SDValue Dmr0Hi(DAG.getMachineNode(
12270 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12271 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12272 0);
12273
12274 SDValue Dmr1Lo(DAG.getMachineNode(
12275 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12276 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12277 0);
12278
12279 SDValue Dmr1Hi(DAG.getMachineNode(
12280 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12281 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12282 0);
12283
12284 MachineSDNode *ExtNode =
12285 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12286 Values.push_back(SDValue(ExtNode, 0));
12287 Values.push_back(SDValue(ExtNode, 1));
12288 ExtNode =
12289 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12290 Values.push_back(SDValue(ExtNode, 0));
12291 Values.push_back(SDValue(ExtNode, 1));
12292 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12293 Values.push_back(SDValue(ExtNode, 0));
12294 Values.push_back(SDValue(ExtNode, 1));
12295 ExtNode =
12296 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12297 Values.push_back(SDValue(ExtNode, 0));
12298 Values.push_back(SDValue(ExtNode, 1));
12299 }
12300
12301 if (Subtarget.isLittleEndian())
12302 std::reverse(Values.begin(), Values.end());
12303
12304 SDVTList Tys = DAG.getVTList(MVT::Other);
12306 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12307 Values[0], BasePtr};
12308 MachineMemOperand *MMO = SN->getMemOperand();
12309 unsigned NumVecs = VT.getSizeInBits() / 256;
12310 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12311 MachineMemOperand *NewMMO =
12312 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12313 if (Idx > 0) {
12314 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12315 DAG.getConstant(32, dl, BasePtr.getValueType()));
12316 Ops[3] = BasePtr;
12317 }
12318 Ops[2] = Values[Idx];
12320 MVT::v256i1, NewMMO);
12321 Stores.push_back(St);
12322 }
12323
12324 SDValue TF = DAG.getTokenFactor(dl, Stores);
12325 return TF;
12326}
12327
12328SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12329 SelectionDAG &DAG) const {
12330 SDLoc dl(Op);
12331 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12332 SDValue StoreChain = SN->getChain();
12333 SDValue BasePtr = SN->getBasePtr();
12334 SDValue Value = SN->getValue();
12335 SDValue Value2 = SN->getValue();
12336 EVT StoreVT = Value.getValueType();
12337
12338 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12339 return LowerDMFVectorStore(Op, DAG);
12340
12341 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12342 return Op;
12343
12344 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12345 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12346 "Type unsupported without MMA");
12347 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12348 "Type unsupported without paired vector support");
12349
12350 // For v256i1 on ISA Future, let the store go through to instruction selection
12351 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12352 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12354 return Op;
12355
12356 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12357 // accumulator underlying registers individually.
12358 Align Alignment = SN->getAlign();
12360 unsigned NumVecs = 2;
12361 if (StoreVT == MVT::v512i1) {
12362 if (Subtarget.isISAFuture()) {
12363 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12364 MachineSDNode *ExtNode = DAG.getMachineNode(
12365 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12366
12367 Value = SDValue(ExtNode, 0);
12368 Value2 = SDValue(ExtNode, 1);
12369 } else
12370 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12371 NumVecs = 4;
12372 }
12373 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12374 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12375 SDValue Elt;
12376 if (Subtarget.isISAFuture()) {
12377 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12378 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12379 Idx > 1 ? Value2 : Value,
12380 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12381 } else
12382 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12383 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12384
12385 SDValue Store =
12386 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12387 SN->getPointerInfo().getWithOffset(Idx * 16),
12388 commonAlignment(Alignment, Idx * 16),
12389 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12390 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12391 DAG.getConstant(16, dl, BasePtr.getValueType()));
12392 Stores.push_back(Store);
12393 }
12394 SDValue TF = DAG.getTokenFactor(dl, Stores);
12395 return TF;
12396}
12397
12398SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12399 SDLoc dl(Op);
12400 if (Op.getValueType() == MVT::v4i32) {
12401 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12402
12403 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12404 // +16 as shift amt.
12405 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12406 SDValue RHSSwap = // = vrlw RHS, 16
12407 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12408
12409 // Shrinkify inputs to v8i16.
12410 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12411 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12412 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12413
12414 // Low parts multiplied together, generating 32-bit results (we ignore the
12415 // top parts).
12416 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12417 LHS, RHS, DAG, dl, MVT::v4i32);
12418
12419 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12420 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12421 // Shift the high parts up 16 bits.
12422 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12423 Neg16, DAG, dl);
12424 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12425 } else if (Op.getValueType() == MVT::v16i8) {
12426 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12427 bool isLittleEndian = Subtarget.isLittleEndian();
12428
12429 // Multiply the even 8-bit parts, producing 16-bit sums.
12430 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12431 LHS, RHS, DAG, dl, MVT::v8i16);
12432 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12433
12434 // Multiply the odd 8-bit parts, producing 16-bit sums.
12435 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12436 LHS, RHS, DAG, dl, MVT::v8i16);
12437 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12438
12439 // Merge the results together. Because vmuleub and vmuloub are
12440 // instructions with a big-endian bias, we must reverse the
12441 // element numbering and reverse the meaning of "odd" and "even"
12442 // when generating little endian code.
12443 int Ops[16];
12444 for (unsigned i = 0; i != 8; ++i) {
12445 if (isLittleEndian) {
12446 Ops[i*2 ] = 2*i;
12447 Ops[i*2+1] = 2*i+16;
12448 } else {
12449 Ops[i*2 ] = 2*i+1;
12450 Ops[i*2+1] = 2*i+1+16;
12451 }
12452 }
12453 if (isLittleEndian)
12454 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12455 else
12456 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12457 } else {
12458 llvm_unreachable("Unknown mul to lower!");
12459 }
12460}
12461
12462SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12463 bool IsStrict = Op->isStrictFPOpcode();
12464 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12465 !Subtarget.hasP9Vector())
12466 return SDValue();
12467
12468 return Op;
12469}
12470
12471// Custom lowering for fpext vf32 to v2f64
12472SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12473
12474 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12475 "Should only be called for ISD::FP_EXTEND");
12476
12477 // FIXME: handle extends from half precision float vectors on P9.
12478 // We only want to custom lower an extend from v2f32 to v2f64.
12479 if (Op.getValueType() != MVT::v2f64 ||
12480 Op.getOperand(0).getValueType() != MVT::v2f32)
12481 return SDValue();
12482
12483 SDLoc dl(Op);
12484 SDValue Op0 = Op.getOperand(0);
12485
12486 switch (Op0.getOpcode()) {
12487 default:
12488 return SDValue();
12490 assert(Op0.getNumOperands() == 2 &&
12492 "Node should have 2 operands with second one being a constant!");
12493
12494 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12495 return SDValue();
12496
12497 // Custom lower is only done for high or low doubleword.
12498 int Idx = Op0.getConstantOperandVal(1);
12499 if (Idx % 2 != 0)
12500 return SDValue();
12501
12502 // Since input is v4f32, at this point Idx is either 0 or 2.
12503 // Shift to get the doubleword position we want.
12504 int DWord = Idx >> 1;
12505
12506 // High and low word positions are different on little endian.
12507 if (Subtarget.isLittleEndian())
12508 DWord ^= 0x1;
12509
12510 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12511 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12512 }
12513 case ISD::FADD:
12514 case ISD::FMUL:
12515 case ISD::FSUB: {
12516 SDValue NewLoad[2];
12517 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12518 // Ensure both input are loads.
12519 SDValue LdOp = Op0.getOperand(i);
12520 if (LdOp.getOpcode() != ISD::LOAD)
12521 return SDValue();
12522 // Generate new load node.
12523 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12524 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12525 NewLoad[i] = DAG.getMemIntrinsicNode(
12526 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12527 LD->getMemoryVT(), LD->getMemOperand());
12528 }
12529 SDValue NewOp =
12530 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12531 NewLoad[1], Op0.getNode()->getFlags());
12532 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12533 DAG.getConstant(0, dl, MVT::i32));
12534 }
12535 case ISD::LOAD: {
12536 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12537 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12538 SDValue NewLd = DAG.getMemIntrinsicNode(
12539 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12540 LD->getMemoryVT(), LD->getMemOperand());
12541 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12542 DAG.getConstant(0, dl, MVT::i32));
12543 }
12544 }
12545 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12546}
12547
12549 SelectionDAG &DAG,
12550 const PPCSubtarget &STI) {
12551 SDLoc DL(Value);
12552 if (STI.useCRBits())
12553 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12554 DAG.getConstant(1, DL, SumType),
12555 DAG.getConstant(0, DL, SumType));
12556 else
12557 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12558 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12559 Value, DAG.getAllOnesConstant(DL, SumType));
12560 return Sum.getValue(1);
12561}
12562
12564 EVT CarryType, SelectionDAG &DAG,
12565 const PPCSubtarget &STI) {
12566 SDLoc DL(Flag);
12567 SDValue Zero = DAG.getConstant(0, DL, SumType);
12568 SDValue Carry = DAG.getNode(
12569 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12570 if (STI.useCRBits())
12571 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12572 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12573}
12574
12575SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12576
12577 SDLoc DL(Op);
12578 SDNode *N = Op.getNode();
12579 EVT VT = N->getValueType(0);
12580 EVT CarryType = N->getValueType(1);
12581 unsigned Opc = N->getOpcode();
12582 bool IsAdd = Opc == ISD::UADDO;
12583 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12584 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12585 N->getOperand(0), N->getOperand(1));
12586 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12587 DAG, Subtarget);
12588 if (!IsAdd)
12589 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12590 DAG.getConstant(1UL, DL, CarryType));
12591 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12592}
12593
12594SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12595 SelectionDAG &DAG) const {
12596 SDLoc DL(Op);
12597 SDNode *N = Op.getNode();
12598 unsigned Opc = N->getOpcode();
12599 EVT VT = N->getValueType(0);
12600 EVT CarryType = N->getValueType(1);
12601 SDValue CarryOp = N->getOperand(2);
12602 bool IsAdd = Opc == ISD::UADDO_CARRY;
12603 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12604 if (!IsAdd)
12605 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12606 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12607 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12608 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12609 Op.getOperand(0), Op.getOperand(1), CarryOp);
12610 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12611 Subtarget);
12612 if (!IsAdd)
12613 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12614 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12615 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12616}
12617
12618SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12619
12620 SDLoc dl(Op);
12621 SDValue LHS = Op.getOperand(0);
12622 SDValue RHS = Op.getOperand(1);
12623 EVT VT = Op.getNode()->getValueType(0);
12624
12625 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12626
12627 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12628 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12629
12630 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12631
12632 SDValue Overflow =
12633 DAG.getNode(ISD::SRL, dl, VT, And,
12634 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12635
12636 SDValue OverflowTrunc =
12637 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12638
12639 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12640}
12641
12642/// Implements signed add with overflow detection using the rule:
12643/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12644SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12645
12646 SDLoc dl(Op);
12647 SDValue LHS = Op.getOperand(0);
12648 SDValue RHS = Op.getOperand(1);
12649 EVT VT = Op.getNode()->getValueType(0);
12650
12651 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12652
12653 // Compute ~(x xor y)
12654 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12655 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12656 // Compute (s xor x)
12657 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12658
12659 // overflow = (x eqv y) & (s xor x)
12660 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12661
12662 // Shift sign bit down to LSB
12663 SDValue Overflow =
12664 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12665 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12666 // Truncate to the overflow type (i1)
12667 SDValue OverflowTrunc =
12668 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12669
12670 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12671}
12672
12673// Lower unsigned 3-way compare producing -1/0/1.
12674SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12675 SDLoc DL(Op);
12676 SDValue A = DAG.getFreeze(Op.getOperand(0));
12677 SDValue B = DAG.getFreeze(Op.getOperand(1));
12678 EVT OpVT = A.getValueType();
12679 EVT ResVT = Op.getValueType();
12680
12681 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12682 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12683 // comparison.
12684 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12685 A = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, A);
12686 B = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, B);
12687 OpVT = MVT::i64;
12688 }
12689
12690 // First compute diff = A - B.
12691 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12692
12693 // Generate B - A using SUBC to capture carry.
12694 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12695 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12696 SDValue CA0 = SubC.getValue(1);
12697
12698 // t2 = A - B + CA0 using SUBE.
12699 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12700 SDValue CA1 = SubE1.getValue(1);
12701
12702 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12703 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12704
12705 // Extract the first result and truncate to result type if needed.
12706 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12707}
12708
12709/// LowerOperation - Provide custom lowering hooks for some operations.
12710///
12712 switch (Op.getOpcode()) {
12713 default:
12714 llvm_unreachable("Wasn't expecting to be able to lower this!");
12715 case ISD::FPOW: return lowerPow(Op, DAG);
12716 case ISD::FSIN: return lowerSin(Op, DAG);
12717 case ISD::FCOS: return lowerCos(Op, DAG);
12718 case ISD::FLOG: return lowerLog(Op, DAG);
12719 case ISD::FLOG10: return lowerLog10(Op, DAG);
12720 case ISD::FEXP: return lowerExp(Op, DAG);
12721 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12722 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12723 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12724 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12725 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12726 case ISD::STRICT_FSETCC:
12728 case ISD::SETCC: return LowerSETCC(Op, DAG);
12729 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12730 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12731 case ISD::SSUBO:
12732 return LowerSSUBO(Op, DAG);
12733 case ISD::SADDO:
12734 return LowerSADDO(Op, DAG);
12735
12736 case ISD::INLINEASM:
12737 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12738 // Variable argument lowering.
12739 case ISD::VASTART: return LowerVASTART(Op, DAG);
12740 case ISD::VAARG: return LowerVAARG(Op, DAG);
12741 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12742
12743 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12744 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12746 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12747
12748 // Exception handling lowering.
12749 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12750 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12751 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12752
12753 case ISD::LOAD: return LowerLOAD(Op, DAG);
12754 case ISD::STORE: return LowerSTORE(Op, DAG);
12755 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12756 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12759 case ISD::FP_TO_UINT:
12760 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12763 case ISD::UINT_TO_FP:
12764 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12765 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12766 case ISD::SET_ROUNDING:
12767 return LowerSET_ROUNDING(Op, DAG);
12768
12769 // Lower 64-bit shifts.
12770 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12771 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12772 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12773
12774 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12775 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12776
12777 // Vector-related lowering.
12778 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12779 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12780 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12781 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12782 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12783 case ISD::MUL: return LowerMUL(Op, DAG);
12784 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12786 case ISD::FP_ROUND:
12787 return LowerFP_ROUND(Op, DAG);
12788 case ISD::ROTL: return LowerROTL(Op, DAG);
12789
12790 // For counter-based loop handling.
12792 return SDValue();
12793
12794 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12795
12796 // Frame & Return address.
12797 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12798 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12799
12801 return LowerINTRINSIC_VOID(Op, DAG);
12802 case ISD::BSWAP:
12803 return LowerBSWAP(Op, DAG);
12805 return LowerATOMIC_CMP_SWAP(Op, DAG);
12806 case ISD::ATOMIC_STORE:
12807 return LowerATOMIC_LOAD_STORE(Op, DAG);
12808 case ISD::IS_FPCLASS:
12809 return LowerIS_FPCLASS(Op, DAG);
12810 case ISD::UADDO:
12811 case ISD::USUBO:
12812 return LowerADDSUBO(Op, DAG);
12813 case ISD::UADDO_CARRY:
12814 case ISD::USUBO_CARRY:
12815 return LowerADDSUBO_CARRY(Op, DAG);
12816 case ISD::UCMP:
12817 return LowerUCMP(Op, DAG);
12818 case ISD::STRICT_LRINT:
12819 case ISD::STRICT_LLRINT:
12820 case ISD::STRICT_LROUND:
12823 if (Op->getFlags().hasNoFPExcept())
12824 return Op;
12825 return SDValue();
12826 case ISD::VP_LOAD:
12827 return LowerVP_LOAD(Op, DAG);
12828 case ISD::VP_STORE:
12829 return LowerVP_STORE(Op, DAG);
12830 }
12831}
12832
12835 SelectionDAG &DAG) const {
12836 SDLoc dl(N);
12837 switch (N->getOpcode()) {
12838 default:
12839 llvm_unreachable("Do not know how to custom type legalize this operation!");
12840 case ISD::ATOMIC_LOAD: {
12841 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12842 Results.push_back(Res);
12843 Results.push_back(Res.getValue(1));
12844 break;
12845 }
12846 case ISD::READCYCLECOUNTER: {
12847 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12848 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12849
12850 Results.push_back(
12851 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12852 Results.push_back(RTB.getValue(2));
12853 break;
12854 }
12856 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12857 break;
12858
12859 assert(N->getValueType(0) == MVT::i1 &&
12860 "Unexpected result type for CTR decrement intrinsic");
12861 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12862 N->getValueType(0));
12863 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12864 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12865 N->getOperand(1));
12866
12867 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12868 Results.push_back(NewInt.getValue(1));
12869 break;
12870 }
12872 switch (N->getConstantOperandVal(0)) {
12873 case Intrinsic::ppc_pack_longdouble:
12874 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12875 N->getOperand(2), N->getOperand(1)));
12876 break;
12877 case Intrinsic::ppc_maxfe:
12878 case Intrinsic::ppc_minfe:
12879 case Intrinsic::ppc_fnmsub:
12880 case Intrinsic::ppc_convert_f128_to_ppcf128:
12881 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12882 break;
12883 }
12884 break;
12885 }
12886 case ISD::VAARG: {
12887 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12888 return;
12889
12890 EVT VT = N->getValueType(0);
12891
12892 if (VT == MVT::i64) {
12893 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12894
12895 Results.push_back(NewNode);
12896 Results.push_back(NewNode.getValue(1));
12897 }
12898 return;
12899 }
12902 case ISD::FP_TO_SINT:
12903 case ISD::FP_TO_UINT: {
12904 // LowerFP_TO_INT() can only handle f32 and f64.
12905 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12906 MVT::ppcf128)
12907 return;
12908 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12909 Results.push_back(LoweredValue);
12910 if (N->isStrictFPOpcode())
12911 Results.push_back(LoweredValue.getValue(1));
12912 return;
12913 }
12914 case ISD::TRUNCATE: {
12915 if (!N->getValueType(0).isVector())
12916 return;
12917 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12918 if (Lowered)
12919 Results.push_back(Lowered);
12920 return;
12921 }
12922 case ISD::SCALAR_TO_VECTOR: {
12923 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12924 if (Lowered)
12925 Results.push_back(Lowered);
12926 return;
12927 }
12928 case ISD::FSHL:
12929 case ISD::FSHR:
12930 // Don't handle funnel shifts here.
12931 return;
12932 case ISD::BITCAST:
12933 // Don't handle bitcast here.
12934 return;
12935 case ISD::FP_EXTEND:
12936 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12937 if (Lowered)
12938 Results.push_back(Lowered);
12939 return;
12940 }
12941}
12942
12943//===----------------------------------------------------------------------===//
12944// Other Lowering Code
12945//===----------------------------------------------------------------------===//
12946
12948 return Builder.CreateIntrinsic(Id, {});
12949}
12950
12952 Value *Addr,
12953 AtomicOrdering Ord) const {
12954 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12955
12956 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12957 "Only 8/16/32/64-bit atomic loads supported");
12958 Intrinsic::ID IntID;
12959 switch (SZ) {
12960 default:
12961 llvm_unreachable("Unexpected PrimitiveSize");
12962 case 8:
12963 IntID = Intrinsic::ppc_lbarx;
12964 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12965 break;
12966 case 16:
12967 IntID = Intrinsic::ppc_lharx;
12968 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12969 break;
12970 case 32:
12971 IntID = Intrinsic::ppc_lwarx;
12972 break;
12973 case 64:
12974 IntID = Intrinsic::ppc_ldarx;
12975 break;
12976 }
12977 Value *Call =
12978 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12979
12980 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12981}
12982
12983// Perform a store-conditional operation to Addr. Return the status of the
12984// store. This should be 0 if the store succeeded, non-zero otherwise.
12986 Value *Val, Value *Addr,
12987 AtomicOrdering Ord) const {
12988 Type *Ty = Val->getType();
12989 unsigned SZ = Ty->getPrimitiveSizeInBits();
12990
12991 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12992 "Only 8/16/32/64-bit atomic loads supported");
12993 Intrinsic::ID IntID;
12994 switch (SZ) {
12995 default:
12996 llvm_unreachable("Unexpected PrimitiveSize");
12997 case 8:
12998 IntID = Intrinsic::ppc_stbcx;
12999 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13000 break;
13001 case 16:
13002 IntID = Intrinsic::ppc_sthcx;
13003 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13004 break;
13005 case 32:
13006 IntID = Intrinsic::ppc_stwcx;
13007 break;
13008 case 64:
13009 IntID = Intrinsic::ppc_stdcx;
13010 break;
13011 }
13012
13013 if (SZ == 8 || SZ == 16)
13014 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13015
13016 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13017 /*FMFSource=*/nullptr, "stcx");
13018 return Builder.CreateXor(Call, Builder.getInt32(1));
13019}
13020
13021// The mappings for emitLeading/TrailingFence is taken from
13022// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13024 Instruction *Inst,
13025 AtomicOrdering Ord) const {
13027 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13028 if (isReleaseOrStronger(Ord))
13029 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13030 return nullptr;
13031}
13032
13034 Instruction *Inst,
13035 AtomicOrdering Ord) const {
13036 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13037 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13038 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13039 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13040 if (isa<LoadInst>(Inst))
13041 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13042 {Inst});
13043 // FIXME: Can use isync for rmw operation.
13044 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13045 }
13046 return nullptr;
13047}
13048
13051 unsigned BinOpcode,
13052 unsigned CmpOpcode,
13053 unsigned CmpPred) const {
13054 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13055 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13056 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13057 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13058 unsigned AtomicSize = MI.getOperand(3).getImm();
13059
13060 auto LoadMnemonic = PPC::LDARX;
13061 auto StoreMnemonic = PPC::STDCX;
13062 switch (AtomicSize) {
13063 default:
13064 llvm_unreachable("Unexpected size of atomic entity");
13065 case 1:
13066 LoadMnemonic = PPC::LBARX;
13067 StoreMnemonic = PPC::STBCX;
13068 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13069 break;
13070 case 2:
13071 LoadMnemonic = PPC::LHARX;
13072 StoreMnemonic = PPC::STHCX;
13073 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13074 break;
13075 case 4:
13076 LoadMnemonic = PPC::LWARX;
13077 StoreMnemonic = PPC::STWCX;
13078 break;
13079 case 8:
13080 LoadMnemonic = PPC::LDARX;
13081 StoreMnemonic = PPC::STDCX;
13082 break;
13083 }
13084
13085 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13086 MachineFunction *F = BB->getParent();
13088
13089 if (CmpOpcode == PPC::CMPW && (AtomicSize == 1 || AtomicSize == 2))
13090 signExtendOperandIfUnknown(MI, BB, 4, /*IsByte=*/AtomicSize == 1, TII);
13091
13092 Register dest = MI.getOperand(0).getReg();
13093 Register ptrA = MI.getOperand(1).getReg();
13094 Register ptrB = MI.getOperand(2).getReg();
13095 Register incr = MI.getOperand(4).getReg();
13096 DebugLoc dl = MI.getDebugLoc();
13097
13098 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13099 MachineBasicBlock *loop2MBB =
13100 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13101 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13102 F->insert(It, loopMBB);
13103 if (CmpOpcode)
13104 F->insert(It, loop2MBB);
13105 F->insert(It, exitMBB);
13106 exitMBB->splice(exitMBB->begin(), BB,
13107 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13109
13110 MachineRegisterInfo &RegInfo = F->getRegInfo();
13111 Register TmpReg = (!BinOpcode) ? incr :
13112 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13113 : &PPC::GPRCRegClass);
13114
13115 // thisMBB:
13116 // ...
13117 // fallthrough --> loopMBB
13118 BB->addSuccessor(loopMBB);
13119
13120 // loopMBB:
13121 // l[wd]arx dest, ptr
13122 // add r0, dest, incr
13123 // st[wd]cx. r0, ptr
13124 // bne- loopMBB
13125 // fallthrough --> exitMBB
13126
13127 // For max/min...
13128 // loopMBB:
13129 // l[wd]arx dest, ptr
13130 // cmpl?[wd] dest, incr
13131 // bgt exitMBB
13132 // loop2MBB:
13133 // st[wd]cx. dest, ptr
13134 // bne- loopMBB
13135 // fallthrough --> exitMBB
13136
13137 BB = loopMBB;
13138 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13139 .addReg(ptrA).addReg(ptrB);
13140 if (BinOpcode)
13141 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13142 if (CmpOpcode) {
13143 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13144 // Signed comparisons of byte or halfword values must be sign-extended.
13145 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13146 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13147 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13148 ExtReg).addReg(dest);
13149 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13150 } else
13151 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13152
13153 BuildMI(BB, dl, TII->get(PPC::BCC))
13154 .addImm(CmpPred)
13155 .addReg(CrReg)
13156 .addMBB(exitMBB);
13157 BB->addSuccessor(loop2MBB);
13158 BB->addSuccessor(exitMBB);
13159 BB = loop2MBB;
13160 }
13161 BuildMI(BB, dl, TII->get(StoreMnemonic))
13162 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13163 BuildMI(BB, dl, TII->get(PPC::BCC))
13165 .addReg(PPC::CR0)
13166 .addMBB(loopMBB);
13167 BB->addSuccessor(loopMBB);
13168 BB->addSuccessor(exitMBB);
13169
13170 // exitMBB:
13171 // ...
13172 BB = exitMBB;
13173 return BB;
13174}
13175
13177 switch(MI.getOpcode()) {
13178 default:
13179 return false;
13180 case PPC::COPY:
13181 return TII->isSignExtended(MI.getOperand(1).getReg(),
13182 &MI.getMF()->getRegInfo());
13183 case PPC::LHA:
13184 case PPC::LHA8:
13185 case PPC::LHAU:
13186 case PPC::LHAU8:
13187 case PPC::LHAUX:
13188 case PPC::LHAUX8:
13189 case PPC::LHAX:
13190 case PPC::LHAX8:
13191 case PPC::LWA:
13192 case PPC::LWAUX:
13193 case PPC::LWAX:
13194 case PPC::LWAX_32:
13195 case PPC::LWA_32:
13196 case PPC::PLHA:
13197 case PPC::PLHA8:
13198 case PPC::PLHA8pc:
13199 case PPC::PLHApc:
13200 case PPC::PLWA:
13201 case PPC::PLWA8:
13202 case PPC::PLWA8pc:
13203 case PPC::PLWApc:
13204 case PPC::EXTSB:
13205 case PPC::EXTSB8:
13206 case PPC::EXTSB8_32_64:
13207 case PPC::EXTSB8_rec:
13208 case PPC::EXTSB_rec:
13209 case PPC::EXTSH:
13210 case PPC::EXTSH8:
13211 case PPC::EXTSH8_32_64:
13212 case PPC::EXTSH8_rec:
13213 case PPC::EXTSH_rec:
13214 case PPC::EXTSW:
13215 case PPC::EXTSWSLI:
13216 case PPC::EXTSWSLI_32_64:
13217 case PPC::EXTSWSLI_32_64_rec:
13218 case PPC::EXTSWSLI_rec:
13219 case PPC::EXTSW_32:
13220 case PPC::EXTSW_32_64:
13221 case PPC::EXTSW_32_64_rec:
13222 case PPC::EXTSW_rec:
13223 case PPC::SRAW:
13224 case PPC::SRAWI:
13225 case PPC::SRAWI_rec:
13226 case PPC::SRAW_rec:
13227 return true;
13228 }
13229 return false;
13230}
13231
13232// Sign extend operand OpIdx if the value is not known to be sign extended.
13233// Assumes the operand is a register. The flag IsByte controls which intruction
13234// is used for the sign extension.
13236 unsigned OpIdx, bool IsByte,
13237 const PPCInstrInfo *TII) {
13238 MachineFunction *F = MI.getMF();
13239 MachineRegisterInfo &RegInfo = F->getRegInfo();
13240 Register Reg = MI.getOperand(OpIdx).getReg();
13241 bool IsSignExtended =
13242 Reg.isVirtual() && isSignExtended(*RegInfo.getVRegDef(Reg), TII);
13243
13244 if (!IsSignExtended) {
13245 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13246 BuildMI(*BB, MI, MI.getDebugLoc(),
13247 TII->get(IsByte ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13248 .addReg(Reg);
13249 MI.getOperand(OpIdx).setReg(ValueReg);
13250 }
13251}
13252
13254 MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode,
13255 unsigned CmpOpcode, unsigned CmpPred) const {
13256 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13257 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13258 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13259 assert(!Subtarget.hasPartwordAtomics() &&
13260 "Assumes that part-word atomics are not available");
13261 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13262
13263 // If this is a signed comparison and the value being compared is not known
13264 // to be sign extended, sign extend it here.
13265 DebugLoc dl = MI.getDebugLoc();
13266 MachineFunction *F = BB->getParent();
13267 MachineRegisterInfo &RegInfo = F->getRegInfo();
13268 const bool is8bit = MI.getOperand(3).getImm() == 1;
13269 if (CmpOpcode == PPC::CMPW)
13270 signExtendOperandIfUnknown(MI, BB, 4, is8bit, TII);
13271 Register incr = MI.getOperand(4).getReg();
13272
13273 // In 64 bit mode we have to use 64 bits for addresses, even though the
13274 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13275 // registers without caring whether they're 32 or 64, but here we're
13276 // doing actual arithmetic on the addresses.
13277 bool is64bit = Subtarget.isPPC64();
13278 bool isLittleEndian = Subtarget.isLittleEndian();
13279 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13280
13281 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13283
13284 Register dest = MI.getOperand(0).getReg();
13285 Register ptrA = MI.getOperand(1).getReg();
13286 Register ptrB = MI.getOperand(2).getReg();
13287
13288 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13289 MachineBasicBlock *loop2MBB =
13290 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13291 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13292 F->insert(It, loopMBB);
13293 if (CmpOpcode)
13294 F->insert(It, loop2MBB);
13295 F->insert(It, exitMBB);
13296 exitMBB->splice(exitMBB->begin(), BB,
13297 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13299
13300 const TargetRegisterClass *RC =
13301 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13302 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13303
13304 Register PtrReg = RegInfo.createVirtualRegister(RC);
13305 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13306 Register ShiftReg =
13307 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13308 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13309 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13310 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13311 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13312 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13313 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13314 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13315 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13316 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13317 Register Ptr1Reg;
13318 Register TmpReg =
13319 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13320
13321 // thisMBB:
13322 // ...
13323 // fallthrough --> loopMBB
13324 BB->addSuccessor(loopMBB);
13325
13326 // The 4-byte load must be aligned, while a char or short may be
13327 // anywhere in the word. Hence all this nasty bookkeeping code.
13328 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13329 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13330 // xori shift, shift1, 24 [16]
13331 // rlwinm ptr, ptr1, 0, 0, 29
13332 // slw incr2, incr, shift
13333 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13334 // slw mask, mask2, shift
13335 // loopMBB:
13336 // lwarx tmpDest, ptr
13337 // add tmp, tmpDest, incr2
13338 // andc tmp2, tmpDest, mask
13339 // and tmp3, tmp, mask
13340 // or tmp4, tmp3, tmp2
13341 // stwcx. tmp4, ptr
13342 // bne- loopMBB
13343 // fallthrough --> exitMBB
13344 // srw SrwDest, tmpDest, shift
13345 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13346 if (ptrA != ZeroReg) {
13347 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13348 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13349 .addReg(ptrA)
13350 .addReg(ptrB);
13351 } else {
13352 Ptr1Reg = ptrB;
13353 }
13354 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13355 // mode.
13356 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13357 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13358 .addImm(3)
13359 .addImm(27)
13360 .addImm(is8bit ? 28 : 27);
13361 if (!isLittleEndian)
13362 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13363 .addReg(Shift1Reg)
13364 .addImm(is8bit ? 24 : 16);
13365 if (is64bit)
13366 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13367 .addReg(Ptr1Reg)
13368 .addImm(0)
13369 .addImm(61);
13370 else
13371 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13372 .addReg(Ptr1Reg)
13373 .addImm(0)
13374 .addImm(0)
13375 .addImm(29);
13376 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13377 if (is8bit)
13378 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13379 else {
13380 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13381 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13382 .addReg(Mask3Reg)
13383 .addImm(65535);
13384 }
13385 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13386 .addReg(Mask2Reg)
13387 .addReg(ShiftReg);
13388
13389 BB = loopMBB;
13390 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13391 .addReg(ZeroReg)
13392 .addReg(PtrReg);
13393 if (BinOpcode)
13394 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13395 .addReg(Incr2Reg)
13396 .addReg(TmpDestReg);
13397 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13398 .addReg(TmpDestReg)
13399 .addReg(MaskReg);
13400 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13401 if (CmpOpcode) {
13402 // For unsigned comparisons, we can directly compare the shifted values.
13403 // For signed comparisons we shift and sign extend.
13404 Register SReg = RegInfo.createVirtualRegister(GPRC);
13405 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13406 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13407 .addReg(TmpDestReg)
13408 .addReg(MaskReg);
13409 unsigned ValueReg = SReg;
13410 unsigned CmpReg = Incr2Reg;
13411 if (CmpOpcode == PPC::CMPW) {
13412 ValueReg = RegInfo.createVirtualRegister(GPRC);
13413 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13414 .addReg(SReg)
13415 .addReg(ShiftReg);
13416 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13417 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13418 .addReg(ValueReg);
13419 ValueReg = ValueSReg;
13420 CmpReg = incr;
13421 }
13422 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13423 BuildMI(BB, dl, TII->get(PPC::BCC))
13424 .addImm(CmpPred)
13425 .addReg(CrReg)
13426 .addMBB(exitMBB);
13427 BB->addSuccessor(loop2MBB);
13428 BB->addSuccessor(exitMBB);
13429 BB = loop2MBB;
13430 }
13431 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13432 BuildMI(BB, dl, TII->get(PPC::STWCX))
13433 .addReg(Tmp4Reg)
13434 .addReg(ZeroReg)
13435 .addReg(PtrReg);
13436 BuildMI(BB, dl, TII->get(PPC::BCC))
13438 .addReg(PPC::CR0)
13439 .addMBB(loopMBB);
13440 BB->addSuccessor(loopMBB);
13441 BB->addSuccessor(exitMBB);
13442
13443 // exitMBB:
13444 // ...
13445 BB = exitMBB;
13446 // Since the shift amount is not a constant, we need to clear
13447 // the upper bits with a separate RLWINM.
13448 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13449 .addReg(SrwDestReg)
13450 .addImm(0)
13451 .addImm(is8bit ? 24 : 16)
13452 .addImm(31);
13453 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13454 .addReg(TmpDestReg)
13455 .addReg(ShiftReg);
13456 return BB;
13457}
13458
13461 MachineBasicBlock *MBB) const {
13462 DebugLoc DL = MI.getDebugLoc();
13463 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13464 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13465
13466 MachineFunction *MF = MBB->getParent();
13467 MachineRegisterInfo &MRI = MF->getRegInfo();
13468
13469 const BasicBlock *BB = MBB->getBasicBlock();
13470 MachineFunction::iterator I = ++MBB->getIterator();
13471
13472 Register DstReg = MI.getOperand(0).getReg();
13473 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13474 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13475 Register mainDstReg = MRI.createVirtualRegister(RC);
13476 Register restoreDstReg = MRI.createVirtualRegister(RC);
13477
13478 MVT PVT = getPointerTy(MF->getDataLayout());
13479 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13480 "Invalid Pointer Size!");
13481 // For v = setjmp(buf), we generate
13482 //
13483 // thisMBB:
13484 // SjLjSetup mainMBB
13485 // bl mainMBB
13486 // v_restore = 1
13487 // b sinkMBB
13488 //
13489 // mainMBB:
13490 // buf[LabelOffset] = LR
13491 // v_main = 0
13492 //
13493 // sinkMBB:
13494 // v = phi(main, restore)
13495 //
13496
13497 MachineBasicBlock *thisMBB = MBB;
13498 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13499 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13500 MF->insert(I, mainMBB);
13501 MF->insert(I, sinkMBB);
13502
13504
13505 // Transfer the remainder of BB and its successor edges to sinkMBB.
13506 sinkMBB->splice(sinkMBB->begin(), MBB,
13507 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13509
13510 // Note that the structure of the jmp_buf used here is not compatible
13511 // with that used by libc, and is not designed to be. Specifically, it
13512 // stores only those 'reserved' registers that LLVM does not otherwise
13513 // understand how to spill. Also, by convention, by the time this
13514 // intrinsic is called, Clang has already stored the frame address in the
13515 // first slot of the buffer and stack address in the third. Following the
13516 // X86 target code, we'll store the jump address in the second slot. We also
13517 // need to save the TOC pointer (R2) to handle jumps between shared
13518 // libraries, and that will be stored in the fourth slot. The thread
13519 // identifier (R13) is not affected.
13520
13521 // thisMBB:
13522 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13523 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13524 const int64_t BPOffset = 4 * PVT.getStoreSize();
13525
13526 // Prepare IP either in reg.
13527 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13528 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13529 Register BufReg = MI.getOperand(1).getReg();
13530
13531 if (Subtarget.is64BitELFABI()) {
13532 setUsesTOCBasePtr(*MBB->getParent());
13533 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13534 .addReg(PPC::X2)
13535 .addImm(TOCOffset)
13536 .addReg(BufReg)
13537 .cloneMemRefs(MI);
13538 }
13539
13540 // Naked functions never have a base pointer, and so we use r1. For all
13541 // other functions, this decision must be delayed until during PEI.
13542 unsigned BaseReg;
13543 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13544 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13545 else
13546 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13547
13548 MIB = BuildMI(*thisMBB, MI, DL,
13549 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13550 .addReg(BaseReg)
13551 .addImm(BPOffset)
13552 .addReg(BufReg)
13553 .cloneMemRefs(MI);
13554
13555 // Setup
13556 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13557 MIB.addRegMask(TRI->getNoPreservedMask());
13558
13559 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13560
13561 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13562 .addMBB(mainMBB);
13563 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13564
13565 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13566 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13567
13568 // mainMBB:
13569 // mainDstReg = 0
13570 MIB =
13571 BuildMI(mainMBB, DL,
13572 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13573
13574 // Store IP
13575 if (Subtarget.isPPC64()) {
13576 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13577 .addReg(LabelReg)
13578 .addImm(LabelOffset)
13579 .addReg(BufReg);
13580 } else {
13581 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13582 .addReg(LabelReg)
13583 .addImm(LabelOffset)
13584 .addReg(BufReg);
13585 }
13586 MIB.cloneMemRefs(MI);
13587
13588 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13589 mainMBB->addSuccessor(sinkMBB);
13590
13591 // sinkMBB:
13592 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13593 TII->get(PPC::PHI), DstReg)
13594 .addReg(mainDstReg).addMBB(mainMBB)
13595 .addReg(restoreDstReg).addMBB(thisMBB);
13596
13597 MI.eraseFromParent();
13598 return sinkMBB;
13599}
13600
13603 MachineBasicBlock *MBB) const {
13604 DebugLoc DL = MI.getDebugLoc();
13605 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13606
13607 MachineFunction *MF = MBB->getParent();
13608 MachineRegisterInfo &MRI = MF->getRegInfo();
13609
13610 MVT PVT = getPointerTy(MF->getDataLayout());
13611 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13612 "Invalid Pointer Size!");
13613
13614 const TargetRegisterClass *RC =
13615 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13616 Register Tmp = MRI.createVirtualRegister(RC);
13617 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13618 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13619 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13620 unsigned BP =
13621 (PVT == MVT::i64)
13622 ? PPC::X30
13623 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13624 : PPC::R30);
13625
13627
13628 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13629 const int64_t SPOffset = 2 * PVT.getStoreSize();
13630 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13631 const int64_t BPOffset = 4 * PVT.getStoreSize();
13632
13633 Register BufReg = MI.getOperand(0).getReg();
13634
13635 // Reload FP (the jumped-to function may not have had a
13636 // frame pointer, and if so, then its r31 will be restored
13637 // as necessary).
13638 if (PVT == MVT::i64) {
13639 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13640 .addImm(0)
13641 .addReg(BufReg);
13642 } else {
13643 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13644 .addImm(0)
13645 .addReg(BufReg);
13646 }
13647 MIB.cloneMemRefs(MI);
13648
13649 // Reload IP
13650 if (PVT == MVT::i64) {
13651 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13652 .addImm(LabelOffset)
13653 .addReg(BufReg);
13654 } else {
13655 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13656 .addImm(LabelOffset)
13657 .addReg(BufReg);
13658 }
13659 MIB.cloneMemRefs(MI);
13660
13661 // Reload SP
13662 if (PVT == MVT::i64) {
13663 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13664 .addImm(SPOffset)
13665 .addReg(BufReg);
13666 } else {
13667 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13668 .addImm(SPOffset)
13669 .addReg(BufReg);
13670 }
13671 MIB.cloneMemRefs(MI);
13672
13673 // Reload BP
13674 if (PVT == MVT::i64) {
13675 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13676 .addImm(BPOffset)
13677 .addReg(BufReg);
13678 } else {
13679 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13680 .addImm(BPOffset)
13681 .addReg(BufReg);
13682 }
13683 MIB.cloneMemRefs(MI);
13684
13685 // Reload TOC
13686 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13687 setUsesTOCBasePtr(*MBB->getParent());
13688 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13689 .addImm(TOCOffset)
13690 .addReg(BufReg)
13691 .cloneMemRefs(MI);
13692 }
13693
13694 // Jump
13695 BuildMI(*MBB, MI, DL,
13696 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13697 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13698
13699 MI.eraseFromParent();
13700 return MBB;
13701}
13702
13704 // If the function specifically requests inline stack probes, emit them.
13705 if (MF.getFunction().hasFnAttribute("probe-stack"))
13706 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13707 "inline-asm";
13708 return false;
13709}
13710
13712 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13713 unsigned StackAlign = TFI->getStackAlignment();
13714 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13715 "Unexpected stack alignment");
13716 // The default stack probe size is 4096 if the function has no
13717 // stack-probe-size attribute.
13718 const Function &Fn = MF.getFunction();
13719 unsigned StackProbeSize =
13720 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13721 // Round down to the stack alignment.
13722 StackProbeSize &= ~(StackAlign - 1);
13723 return StackProbeSize ? StackProbeSize : StackAlign;
13724}
13725
13726// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13727// into three phases. In the first phase, it uses pseudo instruction
13728// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13729// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13730// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13731// MaxCallFrameSize so that it can calculate correct data area pointer.
13734 MachineBasicBlock *MBB) const {
13735 const bool isPPC64 = Subtarget.isPPC64();
13736 MachineFunction *MF = MBB->getParent();
13737 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13738 DebugLoc DL = MI.getDebugLoc();
13739 const unsigned ProbeSize = getStackProbeSize(*MF);
13740 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13741 MachineRegisterInfo &MRI = MF->getRegInfo();
13742 // The CFG of probing stack looks as
13743 // +-----+
13744 // | MBB |
13745 // +--+--+
13746 // |
13747 // +----v----+
13748 // +--->+ TestMBB +---+
13749 // | +----+----+ |
13750 // | | |
13751 // | +-----v----+ |
13752 // +---+ BlockMBB | |
13753 // +----------+ |
13754 // |
13755 // +---------+ |
13756 // | TailMBB +<--+
13757 // +---------+
13758 // In MBB, calculate previous frame pointer and final stack pointer.
13759 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13760 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13761 // TailMBB is spliced via \p MI.
13762 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13763 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13764 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13765
13766 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13767 MF->insert(MBBIter, TestMBB);
13768 MF->insert(MBBIter, BlockMBB);
13769 MF->insert(MBBIter, TailMBB);
13770
13771 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13772 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13773
13774 Register DstReg = MI.getOperand(0).getReg();
13775 Register NegSizeReg = MI.getOperand(1).getReg();
13776 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13777 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13778 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13779 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13780
13781 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13782 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13783 // NegSize.
13784 unsigned ProbeOpc;
13785 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13786 ProbeOpc =
13787 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13788 else
13789 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13790 // and NegSizeReg will be allocated in the same phyreg to avoid
13791 // redundant copy when NegSizeReg has only one use which is current MI and
13792 // will be replaced by PREPARE_PROBED_ALLOCA then.
13793 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13794 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13795 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13796 .addDef(ActualNegSizeReg)
13797 .addReg(NegSizeReg)
13798 .add(MI.getOperand(2))
13799 .add(MI.getOperand(3));
13800
13801 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13802 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13803 FinalStackPtr)
13804 .addReg(SPReg)
13805 .addReg(ActualNegSizeReg);
13806
13807 // Materialize a scratch register for update.
13808 int64_t NegProbeSize = -(int64_t)ProbeSize;
13809 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13810 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13811 if (!isInt<16>(NegProbeSize)) {
13812 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13813 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13814 .addImm(NegProbeSize >> 16);
13815 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13816 ScratchReg)
13817 .addReg(TempReg)
13818 .addImm(NegProbeSize & 0xFFFF);
13819 } else
13820 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13821 .addImm(NegProbeSize);
13822
13823 {
13824 // Probing leading residual part.
13825 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13826 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13827 .addReg(ActualNegSizeReg)
13828 .addReg(ScratchReg);
13829 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13830 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13831 .addReg(Div)
13832 .addReg(ScratchReg);
13833 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13834 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13835 .addReg(Mul)
13836 .addReg(ActualNegSizeReg);
13837 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13838 .addReg(FramePointer)
13839 .addReg(SPReg)
13840 .addReg(NegMod);
13841 }
13842
13843 {
13844 // Remaining part should be multiple of ProbeSize.
13845 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13846 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13847 .addReg(SPReg)
13848 .addReg(FinalStackPtr);
13849 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13851 .addReg(CmpResult)
13852 .addMBB(TailMBB);
13853 TestMBB->addSuccessor(BlockMBB);
13854 TestMBB->addSuccessor(TailMBB);
13855 }
13856
13857 {
13858 // Touch the block.
13859 // |P...|P...|P...
13860 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13861 .addReg(FramePointer)
13862 .addReg(SPReg)
13863 .addReg(ScratchReg);
13864 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13865 BlockMBB->addSuccessor(TestMBB);
13866 }
13867
13868 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13869 // DYNAREAOFFSET pseudo instruction to get the future result.
13870 Register MaxCallFrameSizeReg =
13871 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13872 BuildMI(TailMBB, DL,
13873 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13874 MaxCallFrameSizeReg)
13875 .add(MI.getOperand(2))
13876 .add(MI.getOperand(3));
13877 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13878 .addReg(SPReg)
13879 .addReg(MaxCallFrameSizeReg);
13880
13881 // Splice instructions after MI to TailMBB.
13882 TailMBB->splice(TailMBB->end(), MBB,
13883 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13885 MBB->addSuccessor(TestMBB);
13886
13887 // Delete the pseudo instruction.
13888 MI.eraseFromParent();
13889
13890 ++NumDynamicAllocaProbed;
13891 return TailMBB;
13892}
13893
13894/// Check if the opcode is a SELECT or SELECT_CC variant.
13895/// @param Opcode The opcode to check
13896/// @param CheckOnlyCC If true, only return true for SELECT_CC variants;
13897/// if false, return true for both SELECT and SELECT_CC
13898static bool IsSelect(unsigned Opcode, bool CheckOnlyCC = false) {
13899 switch (Opcode) {
13900 // SELECT_CC variants - always return true
13901 case PPC::SELECT_CC_I4:
13902 case PPC::SELECT_CC_I8:
13903 case PPC::SELECT_CC_F4:
13904 case PPC::SELECT_CC_F8:
13905 case PPC::SELECT_CC_F16:
13906 case PPC::SELECT_CC_VRRC:
13907 case PPC::SELECT_CC_VSFRC:
13908 case PPC::SELECT_CC_VSSRC:
13909 case PPC::SELECT_CC_VSRC:
13910 case PPC::SELECT_CC_SPE4:
13911 case PPC::SELECT_CC_SPE:
13912 return true;
13913 // SELECT variants - only return true if CheckOnlyCC is false
13914 case PPC::SELECT_I4:
13915 case PPC::SELECT_I8:
13916 case PPC::SELECT_F4:
13917 case PPC::SELECT_F8:
13918 case PPC::SELECT_F16:
13919 case PPC::SELECT_SPE:
13920 case PPC::SELECT_SPE4:
13921 case PPC::SELECT_VRRC:
13922 case PPC::SELECT_VSFRC:
13923 case PPC::SELECT_VSSRC:
13924 case PPC::SELECT_VSRC:
13925 return !CheckOnlyCC; // true if checking all SELECTs, false if only CC
13926 default:
13927 return false;
13928 }
13929}
13930static bool IsSelectCC(unsigned Opcode) { return IsSelect(Opcode, true); }
13931
13932/// Emit SELECT instruction, using ISEL if available, otherwise use
13933/// branch-based control flow.
13934///
13935/// For targets with ISEL support (SELECT_CC_I4/I8, SELECT_I4/I8), this
13936/// generates a single ISEL instruction. Otherwise, it creates a
13937/// branch-based control flow pattern with PHI nodes.
13939 const TargetInstrInfo *TII,
13940 const PPCSubtarget &Subtarget) {
13941 assert(IsSelect(MI.getOpcode()) && "Instruction must be a SELECT variant");
13942
13943 // Check if we can use ISEL for this SELECT
13944 if (Subtarget.hasISEL() &&
13945 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13946 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13947 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13949 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13950 MI.getOpcode() == PPC::SELECT_CC_I8)
13951 Cond.push_back(MI.getOperand(4));
13952 else
13954 Cond.push_back(MI.getOperand(1));
13955
13956 DebugLoc dl = MI.getDebugLoc();
13957 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13958 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13959 MI.eraseFromParent();
13960 return BB;
13961 }
13962
13963 // Fall back to branch-based SELECT implementation
13964 MachineFunction *F = BB->getParent();
13965 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13967 DebugLoc dl = MI.getDebugLoc();
13968
13969 MachineBasicBlock *thisMBB = BB;
13970 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13971 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13972 F->insert(It, copy0MBB);
13973 F->insert(It, sinkMBB);
13974
13975 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13976 copy0MBB->addLiveIn(PPC::CARRY);
13977 sinkMBB->addLiveIn(PPC::CARRY);
13978 }
13979
13980 // Set the call frame size on entry to the new basic blocks.
13981 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13982 copy0MBB->setCallFrameSize(CallFrameSize);
13983 sinkMBB->setCallFrameSize(CallFrameSize);
13984
13985 // Transfer the remainder of BB and its successor edges to sinkMBB.
13986 sinkMBB->splice(sinkMBB->begin(), BB,
13987 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13989
13990 // Add successors
13991 BB->addSuccessor(copy0MBB);
13992 BB->addSuccessor(sinkMBB);
13993
13994 // Build branch instruction
13995 if (IsSelectCC(MI.getOpcode()))
13996 BuildMI(BB, dl, TII->get(PPC::BCC))
13997 .addImm(MI.getOperand(4).getImm())
13998 .addReg(MI.getOperand(1).getReg())
13999 .addMBB(sinkMBB);
14000 else
14001 BuildMI(BB, dl, TII->get(PPC::BC))
14002 .addReg(MI.getOperand(1).getReg())
14003 .addMBB(sinkMBB);
14004
14005 // copy0MBB: fallthrough to sinkMBB
14006 BB = copy0MBB;
14007 BB->addSuccessor(sinkMBB);
14008
14009 // sinkMBB: PHI instruction
14010 BB = sinkMBB;
14011 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14012 .addReg(MI.getOperand(3).getReg())
14013 .addMBB(copy0MBB)
14014 .addReg(MI.getOperand(2).getReg())
14015 .addMBB(thisMBB);
14016 MI.eraseFromParent();
14017 return BB;
14018}
14019
14020/// Helper function to create basic blocks for atomic compare-and-swap.
14021/// Creates three basic blocks (loop1MBB, loop2MBB, exitMBB) and sets up
14022/// the control flow structure common to both hardware and software
14023/// implementations of atomic compare-and-swap operations.
14025 MachineBasicBlock *&loop1MBB,
14026 MachineBasicBlock *&loop2MBB,
14027 MachineBasicBlock *&exitMBB,
14030 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14031 loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14032 loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14033 exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14034 F->insert(It, loop1MBB);
14035 F->insert(It, loop2MBB);
14036 F->insert(It, exitMBB);
14037 exitMBB->splice(exitMBB->begin(), BB,
14038 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14040 BB->addSuccessor(loop1MBB);
14041}
14042
14043/// Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16
14044/// with partword atomic support.
14045///
14046/// This uses native PowerPC atomic instructions (LBARX/LHARX/LWARX/LDARX for
14047/// load-and-reserve, STBCX/STHCX/STWCX/STDCX for store-conditional) to
14048/// implement atomic compare-and-swap at byte, halfword, word, or doubleword
14049/// granularity.
14050///
14051/// Control flow:
14052/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14053/// | |
14054/// +------------+
14055///
14056/// loop1MBB:
14057/// - Load-and-reserve from memory
14058/// - Compare loaded value with expected old value
14059/// - Branch to exitMBB if not equal (CAS failed)
14060/// loop2MBB:
14061/// - Store-conditional new value to memory
14062/// - Branch back to loop1MBB if store failed (retry)
14063/// - Fall through to exitMBB on success
14064static MachineBasicBlock *
14066 const TargetInstrInfo *TII,
14067 const PPCSubtarget &Subtarget) {
14068 MachineFunction *F = BB->getParent();
14070
14071 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14072
14073 unsigned LoadMnemonic = PPC::LDARX;
14074 unsigned StoreMnemonic = PPC::STDCX;
14075 switch (MI.getOpcode()) {
14076 default:
14077 llvm_unreachable("Compare and swap of unknown size");
14078 case PPC::ATOMIC_CMP_SWAP_I8:
14079 LoadMnemonic = PPC::LBARX;
14080 StoreMnemonic = PPC::STBCX;
14081 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14082 break;
14083 case PPC::ATOMIC_CMP_SWAP_I16:
14084 LoadMnemonic = PPC::LHARX;
14085 StoreMnemonic = PPC::STHCX;
14086 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14087 break;
14088 case PPC::ATOMIC_CMP_SWAP_I32:
14089 LoadMnemonic = PPC::LWARX;
14090 StoreMnemonic = PPC::STWCX;
14091 break;
14092 case PPC::ATOMIC_CMP_SWAP_I64:
14093 LoadMnemonic = PPC::LDARX;
14094 StoreMnemonic = PPC::STDCX;
14095 break;
14096 }
14097
14098 MachineRegisterInfo &RegInfo = F->getRegInfo();
14099 Register dest = MI.getOperand(0).getReg();
14100 Register ptrA = MI.getOperand(1).getReg();
14101 Register ptrB = MI.getOperand(2).getReg();
14102 Register oldval = MI.getOperand(3).getReg();
14103 Register newval = MI.getOperand(4).getReg();
14104 DebugLoc dl = MI.getDebugLoc();
14105
14106 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14107 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14108
14109 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14110
14111 // loop1MBB:
14112 // l[bhwd]arx dest, ptr
14113 // cmp[wd] dest, oldval
14114 // bne- exitBB
14115 BB = loop1MBB;
14116 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14117 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14118 .addReg(dest)
14119 .addReg(oldval);
14120 BuildMI(BB, dl, TII->get(PPC::BCC))
14122 .addReg(CrReg)
14123 .addMBB(exitMBB);
14124 BB->addSuccessor(loop2MBB);
14125 BB->addSuccessor(exitMBB);
14126
14127 // loop2MBB:
14128 // st[bhwd]cx. newval, ptr
14129 // bne- loopMBB
14130 // b exitBB
14131 BB = loop2MBB;
14132 BuildMI(BB, dl, TII->get(StoreMnemonic))
14133 .addReg(newval)
14134 .addReg(ptrA)
14135 .addReg(ptrB);
14136 BuildMI(BB, dl, TII->get(PPC::BCC))
14138 .addReg(PPC::CR0)
14139 .addMBB(loop1MBB);
14140 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14141 BB->addSuccessor(loop1MBB);
14142 BB->addSuccessor(exitMBB);
14143
14144 return exitMBB;
14145}
14146
14147/// Emit software-emulated atomic compare-and-swap for I8/I16 without
14148/// hardware partword atomic support.
14149///
14150/// This emulates byte/halfword atomic operations using word (32-bit) atomic
14151/// instructions. Since PowerPC atomic instructions work at word granularity,
14152/// we must:
14153/// 1. Align the pointer to a word boundary
14154/// 2. Calculate the bit shift for the target byte/halfword within the word
14155/// 3. Create masks to isolate the target byte/halfword
14156/// 4. Shift old/new values into the correct bit position
14157/// 5. Use LWARX/STWCX on the full word
14158/// 6. Mask and merge to preserve other bytes in the word
14159/// 7. Extract and shift the result back
14160///
14161/// Control flow:
14162/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14163/// | |
14164/// +------------+
14165///
14166/// loop1MBB:
14167/// - LWARX: Load-and-reserve full word
14168/// - Mask to extract target byte/halfword
14169/// - Compare with expected old value
14170/// - Branch to exitMBB if not equal (CAS failed)
14171/// loop2MBB:
14172/// - Merge new value with other bytes in the word
14173/// - STWCX: Store-conditional full word
14174/// - Branch back to loop1MBB if store failed (retry)
14175/// - Fall through to exitMBB on success
14176/// exitMBB:
14177/// - Extract and return the loaded value
14178static MachineBasicBlock *
14180 const TargetInstrInfo *TII,
14181 const PPCSubtarget &Subtarget) {
14182 MachineFunction *F = BB->getParent();
14184
14185 bool is64bit = Subtarget.isPPC64();
14186 bool isLittleEndian = Subtarget.isLittleEndian();
14187 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14188
14189 Register dest = MI.getOperand(0).getReg();
14190 Register ptrA = MI.getOperand(1).getReg();
14191 Register ptrB = MI.getOperand(2).getReg();
14192 Register oldval = MI.getOperand(3).getReg();
14193 Register newval = MI.getOperand(4).getReg();
14194 DebugLoc dl = MI.getDebugLoc();
14195
14196 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14197 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14198
14199 MachineRegisterInfo &RegInfo = F->getRegInfo();
14200 const TargetRegisterClass *RC =
14201 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14202 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14203
14204 // Lambda to create virtual registers
14205 auto createVReg = [&](const TargetRegisterClass *RC) {
14206 return RegInfo.createVirtualRegister(RC);
14207 };
14208
14209 Register PtrReg = createVReg(RC);
14210 Register Shift1Reg = createVReg(GPRC);
14211 Register ShiftReg = isLittleEndian ? Shift1Reg : createVReg(GPRC);
14212 Register NewVal2Reg = createVReg(GPRC);
14213 Register NewVal3Reg = createVReg(GPRC);
14214 Register OldVal2Reg = createVReg(GPRC);
14215 Register OldVal3Reg = createVReg(GPRC);
14216 Register MaskReg = createVReg(GPRC);
14217 Register Mask2Reg = createVReg(GPRC);
14218 Register Mask3Reg = createVReg(GPRC);
14219 Register Tmp2Reg = createVReg(GPRC);
14220 Register Tmp4Reg = createVReg(GPRC);
14221 Register TmpDestReg = createVReg(GPRC);
14222 Register TmpReg = createVReg(GPRC);
14223 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14224 Register CrReg = createVReg(&PPC::CRRCRegClass);
14225
14226 // Compute aligned pointer and shift amount
14227 Register Ptr1Reg;
14228 if (ptrA != ZeroReg) {
14229 Ptr1Reg = createVReg(RC);
14230 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14231 .addReg(ptrA)
14232 .addReg(ptrB);
14233 } else {
14234 Ptr1Reg = ptrB;
14235 }
14236
14237 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14238 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14239 .addImm(3)
14240 .addImm(27)
14241 .addImm(is8bit ? 28 : 27);
14242 if (!isLittleEndian)
14243 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14244 .addReg(Shift1Reg)
14245 .addImm(is8bit ? 24 : 16);
14246 if (is64bit)
14247 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14248 .addReg(Ptr1Reg)
14249 .addImm(0)
14250 .addImm(61);
14251 else
14252 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14253 .addReg(Ptr1Reg)
14254 .addImm(0)
14255 .addImm(0)
14256 .addImm(29);
14257
14258 // Prepare masked values
14259 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14260 .addReg(newval)
14261 .addReg(ShiftReg);
14262 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14263 .addReg(oldval)
14264 .addReg(ShiftReg);
14265 if (is8bit)
14266 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14267 else {
14268 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14269 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14270 .addReg(Mask3Reg)
14271 .addImm(65535);
14272 }
14273 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14274 .addReg(Mask2Reg)
14275 .addReg(ShiftReg);
14276 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14277 .addReg(NewVal2Reg)
14278 .addReg(MaskReg);
14279 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14280 .addReg(OldVal2Reg)
14281 .addReg(MaskReg);
14282
14283 // loop1MBB:
14284 // lwarx tmpDest, ptr
14285 // and tmp, tmpDest, mask
14286 // cmpw tmp, oldval3
14287 // bne- exitBB
14288 BB = loop1MBB;
14289 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14290 .addReg(ZeroReg)
14291 .addReg(PtrReg);
14292 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14293 .addReg(TmpDestReg)
14294 .addReg(MaskReg);
14295 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg).addReg(TmpReg).addReg(OldVal3Reg);
14296 BuildMI(BB, dl, TII->get(PPC::BCC))
14298 .addReg(CrReg)
14299 .addMBB(exitMBB);
14300 BB->addSuccessor(loop2MBB);
14301 BB->addSuccessor(exitMBB);
14302
14303 // loop2MBB:
14304 // andc tmp2, tmpDest, mask
14305 // or tmp4, tmp2, newval3
14306 // stwcx. tmp4, ptr
14307 // bne- loop1MBB
14308 // b exitBB
14309 BB = loop2MBB;
14310 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14311 .addReg(TmpDestReg)
14312 .addReg(MaskReg);
14313 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14314 .addReg(Tmp2Reg)
14315 .addReg(NewVal3Reg);
14316 BuildMI(BB, dl, TII->get(PPC::STWCX))
14317 .addReg(Tmp4Reg)
14318 .addReg(ZeroReg)
14319 .addReg(PtrReg);
14320 BuildMI(BB, dl, TII->get(PPC::BCC))
14322 .addReg(PPC::CR0)
14323 .addMBB(loop1MBB);
14324 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14325 BB->addSuccessor(loop1MBB);
14326 BB->addSuccessor(exitMBB);
14327
14328 // exitMBB:
14329 // srw dest, tmpDest, shift
14330 BB = exitMBB;
14331 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14332 .addReg(TmpReg)
14333 .addReg(ShiftReg);
14334
14335 return BB;
14336}
14337
14340 MachineBasicBlock *BB) const {
14341 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14342
14343 // To "insert" these instructions we actually have to insert their
14344 // control-flow patterns.
14345 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14347
14348 MachineFunction *F = BB->getParent();
14349 MachineRegisterInfo &MRI = F->getRegInfo();
14350
14351 // Handle SELECT with ISEL support first (before generic SELECT handling)
14352 if (IsSelect(MI.getOpcode()))
14353 return emitSelect(MI, BB, TII, Subtarget);
14354
14355 switch (MI.getOpcode()) {
14356 case TargetOpcode::STACKMAP:
14357 return emitPatchPoint(MI, BB);
14358 case TargetOpcode::PATCHPOINT:
14359 // Call lowering should have added an r2 operand to indicate a dependence
14360 // on the TOC base pointer value. It can't however, because there is no
14361 // way to mark the dependence as implicit there, and so the stackmap code
14362 // will confuse it with a regular operand. Instead, add the dependence
14363 // here.
14364 if (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls())
14365 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
14366 return emitPatchPoint(MI, BB);
14367
14368 case PPC::EH_SjLj_SetJmp32:
14369 case PPC::EH_SjLj_SetJmp64:
14370 return emitEHSjLjSetJmp(MI, BB);
14371
14372 case PPC::EH_SjLj_LongJmp32:
14373 case PPC::EH_SjLj_LongJmp64:
14374 return emitEHSjLjLongJmp(MI, BB);
14375
14376 case PPC::ReadTB: {
14377 // To read the 64-bit time-base register on a 32-bit target, we read the
14378 // two halves. Should the counter have wrapped while it was being read, we
14379 // need to try again.
14380 // ...
14381 // readLoop:
14382 // mfspr Rx,TBU # load from TBU
14383 // mfspr Ry,TB # load from TB
14384 // mfspr Rz,TBU # load from TBU
14385 // cmpw crX,Rx,Rz # check if 'old'='new'
14386 // bne readLoop # branch if they're not equal
14387 // ...
14388
14389 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14390 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14391 DebugLoc dl = MI.getDebugLoc();
14392 F->insert(It, readMBB);
14393 F->insert(It, sinkMBB);
14394
14395 // Transfer the remainder of BB and its successor edges to sinkMBB.
14396 sinkMBB->splice(sinkMBB->begin(), BB,
14397 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14399
14400 BB->addSuccessor(readMBB);
14401 BB = readMBB;
14402
14403 MachineRegisterInfo &RegInfo = F->getRegInfo();
14404 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14405 Register LoReg = MI.getOperand(0).getReg();
14406 Register HiReg = MI.getOperand(1).getReg();
14407
14408 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14409 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14410 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14411
14412 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14413
14414 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14415 .addReg(HiReg)
14416 .addReg(ReadAgainReg);
14417 BuildMI(BB, dl, TII->get(PPC::BCC))
14419 .addReg(CmpReg)
14420 .addMBB(readMBB);
14421
14422 BB->addSuccessor(readMBB);
14423 BB->addSuccessor(sinkMBB);
14424 break;
14425 }
14426 case PPC::ATOMIC_LOAD_ADD_NOWP:
14427 BB = EmitPartwordAtomicBinary(MI, BB, PPC::ADD4);
14428 break;
14429 case PPC::ATOMIC_LOAD_ADD:
14430 BB = EmitAtomicBinary(MI, BB, PPC::ADD4);
14431 break;
14432 case PPC::ATOMIC_LOAD_ADD_I64:
14433 BB = EmitAtomicBinary(MI, BB, PPC::ADD8);
14434 break;
14435 case PPC::ATOMIC_LOAD_AND_NOWP:
14436 BB = EmitPartwordAtomicBinary(MI, BB, PPC::AND);
14437 break;
14438 case PPC::ATOMIC_LOAD_AND:
14439 BB = EmitAtomicBinary(MI, BB, PPC::AND);
14440 break;
14441 case PPC::ATOMIC_LOAD_AND_I64:
14442 BB = EmitAtomicBinary(MI, BB, PPC::AND8);
14443 break;
14444 case PPC::ATOMIC_LOAD_OR_NOWP:
14445 BB = EmitPartwordAtomicBinary(MI, BB, PPC::OR);
14446 break;
14447 case PPC::ATOMIC_LOAD_OR:
14448 BB = EmitAtomicBinary(MI, BB, PPC::OR);
14449 break;
14450 case PPC::ATOMIC_LOAD_OR_I64:
14451 BB = EmitAtomicBinary(MI, BB, PPC::OR8);
14452 break;
14453 case PPC::ATOMIC_LOAD_XOR_NOWP:
14454 BB = EmitPartwordAtomicBinary(MI, BB, PPC::XOR);
14455 break;
14456 case PPC::ATOMIC_LOAD_XOR:
14457 BB = EmitAtomicBinary(MI, BB, PPC::XOR);
14458 break;
14459 case PPC::ATOMIC_LOAD_XOR_I64:
14460 BB = EmitAtomicBinary(MI, BB, PPC::XOR8);
14461 break;
14462 case PPC::ATOMIC_LOAD_NAND_NOWP:
14463 BB = EmitPartwordAtomicBinary(MI, BB, PPC::NAND);
14464 break;
14465 case PPC::ATOMIC_LOAD_NAND:
14466 BB = EmitAtomicBinary(MI, BB, PPC::NAND);
14467 break;
14468 case PPC::ATOMIC_LOAD_NAND_I64:
14469 BB = EmitAtomicBinary(MI, BB, PPC::NAND8);
14470 break;
14471 case PPC::ATOMIC_LOAD_SUB_NOWP:
14472 BB = EmitPartwordAtomicBinary(MI, BB, PPC::SUBF);
14473 break;
14474 case PPC::ATOMIC_LOAD_SUB:
14475 BB = EmitAtomicBinary(MI, BB, PPC::SUBF);
14476 break;
14477 case PPC::ATOMIC_LOAD_SUB_I64:
14478 BB = EmitAtomicBinary(MI, BB, PPC::SUBF8);
14479 break;
14480 case PPC::ATOMIC_LOAD_MIN_NOWP:
14481 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14482 break;
14483 case PPC::ATOMIC_LOAD_MIN:
14484 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14485 break;
14486 case PPC::ATOMIC_LOAD_MIN_I64:
14487 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_LT);
14488 break;
14489 case PPC::ATOMIC_LOAD_MAX_NOWP:
14490 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14491 break;
14492 case PPC::ATOMIC_LOAD_MAX:
14493 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14494 break;
14495 case PPC::ATOMIC_LOAD_MAX_I64:
14496 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_GT);
14497 break;
14498 case PPC::ATOMIC_LOAD_UMIN_NOWP:
14499 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14500 break;
14501 case PPC::ATOMIC_LOAD_UMIN:
14502 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14503 break;
14504 case PPC::ATOMIC_LOAD_UMIN_I64:
14505 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_LT);
14506 break;
14507 case PPC::ATOMIC_LOAD_UMAX_NOWP:
14508 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14509 break;
14510 case PPC::ATOMIC_LOAD_UMAX:
14511 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14512 break;
14513 case PPC::ATOMIC_LOAD_UMAX_I64:
14514 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_GT);
14515 break;
14516 case PPC::ATOMIC_SWAP_NOWP:
14517 BB = EmitPartwordAtomicBinary(MI, BB, 0);
14518 break;
14519 case PPC::ATOMIC_SWAP:
14520 case PPC::ATOMIC_SWAP_I64:
14521 BB = EmitAtomicBinary(MI, BB, 0);
14522 break;
14523 case PPC::ATOMIC_CMP_SWAP_I32:
14524 case PPC::ATOMIC_CMP_SWAP_I64:
14525 case PPC::ATOMIC_CMP_SWAP_I8:
14526 case PPC::ATOMIC_CMP_SWAP_I16: {
14527 // Use hardware-supported atomic operations if available
14528 bool useHardware = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14529 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14530 (Subtarget.hasPartwordAtomics() &&
14531 (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14532 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16));
14533
14534 if (useHardware)
14535 BB = emitAtomicCmpSwapHardware(MI, BB, TII, Subtarget);
14536 else
14537 BB = emitAtomicCmpSwapSoftware(MI, BB, TII, Subtarget);
14538 break;
14539 }
14540 case PPC::FADDrtz: {
14541 // This pseudo performs an FADD with rounding mode temporarily forced
14542 // to round-to-zero. We emit this via custom inserter since the FPSCR
14543 // is not modeled at the SelectionDAG level.
14544 Register Dest = MI.getOperand(0).getReg();
14545 Register Src1 = MI.getOperand(1).getReg();
14546 Register Src2 = MI.getOperand(2).getReg();
14547 DebugLoc dl = MI.getDebugLoc();
14548
14549 MachineRegisterInfo &RegInfo = F->getRegInfo();
14550 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14551
14552 // Save FPSCR value.
14553 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14554
14555 // Set rounding mode to round-to-zero.
14556 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14557 .addImm(31)
14559
14560 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14561 .addImm(30)
14563
14564 // Perform addition.
14565 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14566 .addReg(Src1)
14567 .addReg(Src2);
14568 if (MI.getFlag(MachineInstr::NoFPExcept))
14570
14571 // Restore FPSCR value.
14572 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14573 break;
14574 }
14575 case PPC::ANDI_rec_1_EQ_BIT:
14576 case PPC::ANDI_rec_1_GT_BIT:
14577 case PPC::ANDI_rec_1_EQ_BIT8:
14578 case PPC::ANDI_rec_1_GT_BIT8: {
14579 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14580 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14581 ? PPC::ANDI8_rec
14582 : PPC::ANDI_rec;
14583 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14584 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14585
14586 MachineRegisterInfo &RegInfo = F->getRegInfo();
14587 Register Dest = RegInfo.createVirtualRegister(
14588 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14589
14590 DebugLoc Dl = MI.getDebugLoc();
14591 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14592 .addReg(MI.getOperand(1).getReg())
14593 .addImm(1);
14594 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14595 MI.getOperand(0).getReg())
14596 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14597 break;
14598 }
14599 case PPC::TCHECK_RET: {
14600 DebugLoc Dl = MI.getDebugLoc();
14601 MachineRegisterInfo &RegInfo = F->getRegInfo();
14602 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14603 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14604 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14605 MI.getOperand(0).getReg())
14606 .addReg(CRReg);
14607 break;
14608 }
14609 case PPC::TBEGIN_RET: {
14610 DebugLoc Dl = MI.getDebugLoc();
14611 unsigned Imm = MI.getOperand(1).getImm();
14612 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14613 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14614 MI.getOperand(0).getReg())
14615 .addReg(PPC::CR0EQ);
14616 break;
14617 }
14618 case PPC::SETRNDi: {
14619 DebugLoc dl = MI.getDebugLoc();
14620 Register OldFPSCRReg = MI.getOperand(0).getReg();
14621
14622 // Save FPSCR value.
14623 if (MRI.use_empty(OldFPSCRReg))
14624 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14625 else
14626 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14627
14628 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14629 // the following settings:
14630 // 00 Round to nearest
14631 // 01 Round to 0
14632 // 10 Round to +inf
14633 // 11 Round to -inf
14634
14635 // When the operand is immediate, using the two least significant bits of
14636 // the immediate to set the bits 62:63 of FPSCR.
14637 unsigned Mode = MI.getOperand(1).getImm();
14638 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14639 .addImm(31)
14641
14642 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14643 .addImm(30)
14645 break;
14646 }
14647 case PPC::SETRND: {
14648 DebugLoc dl = MI.getDebugLoc();
14649
14650 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14651 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14652 // If the target doesn't have DirectMove, we should use stack to do the
14653 // conversion, because the target doesn't have the instructions like mtvsrd
14654 // or mfvsrd to do this conversion directly.
14655 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14656 if (Subtarget.hasDirectMove()) {
14657 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14658 .addReg(SrcReg);
14659 } else {
14660 // Use stack to do the register copy.
14661 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14662 MachineRegisterInfo &RegInfo = F->getRegInfo();
14663 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14664 if (RC == &PPC::F8RCRegClass) {
14665 // Copy register from F8RCRegClass to G8RCRegclass.
14666 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14667 "Unsupported RegClass.");
14668
14669 StoreOp = PPC::STFD;
14670 LoadOp = PPC::LD;
14671 } else {
14672 // Copy register from G8RCRegClass to F8RCRegclass.
14673 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14674 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14675 "Unsupported RegClass.");
14676 }
14677
14678 MachineFrameInfo &MFI = F->getFrameInfo();
14679 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14680
14681 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14682 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14684 MFI.getObjectAlign(FrameIdx));
14685
14686 // Store the SrcReg into the stack.
14687 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14688 .addReg(SrcReg)
14689 .addImm(0)
14690 .addFrameIndex(FrameIdx)
14691 .addMemOperand(MMOStore);
14692
14693 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14694 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14696 MFI.getObjectAlign(FrameIdx));
14697
14698 // Load from the stack where SrcReg is stored, and save to DestReg,
14699 // so we have done the RegClass conversion from RegClass::SrcReg to
14700 // RegClass::DestReg.
14701 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14702 .addImm(0)
14703 .addFrameIndex(FrameIdx)
14704 .addMemOperand(MMOLoad);
14705 }
14706 };
14707
14708 Register OldFPSCRReg = MI.getOperand(0).getReg();
14709
14710 // Save FPSCR value.
14711 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14712
14713 // When the operand is gprc register, use two least significant bits of the
14714 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14715 //
14716 // copy OldFPSCRTmpReg, OldFPSCRReg
14717 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14718 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14719 // copy NewFPSCRReg, NewFPSCRTmpReg
14720 // mtfsf 255, NewFPSCRReg
14721 MachineOperand SrcOp = MI.getOperand(1);
14722 MachineRegisterInfo &RegInfo = F->getRegInfo();
14723 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14724
14725 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14726
14727 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14728 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14729
14730 // The first operand of INSERT_SUBREG should be a register which has
14731 // subregisters, we only care about its RegClass, so we should use an
14732 // IMPLICIT_DEF register.
14733 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14734 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14735 .addReg(ImDefReg)
14736 .add(SrcOp)
14737 .addImm(1);
14738
14739 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14740 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14741 .addReg(OldFPSCRTmpReg)
14742 .addReg(ExtSrcReg)
14743 .addImm(0)
14744 .addImm(62);
14745
14746 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14747 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14748
14749 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14750 // bits of FPSCR.
14751 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14752 .addImm(255)
14753 .addReg(NewFPSCRReg)
14754 .addImm(0)
14755 .addImm(0);
14756 break;
14757 }
14758 case PPC::SETFLM: {
14759 DebugLoc Dl = MI.getDebugLoc();
14760
14761 // Result of setflm is previous FPSCR content, so we need to save it first.
14762 Register OldFPSCRReg = MI.getOperand(0).getReg();
14763 if (MRI.use_empty(OldFPSCRReg))
14764 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14765 else
14766 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14767
14768 // Put bits in 32:63 to FPSCR.
14769 Register NewFPSCRReg = MI.getOperand(1).getReg();
14770 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14771 .addImm(255)
14772 .addReg(NewFPSCRReg)
14773 .addImm(0)
14774 .addImm(0);
14775 break;
14776 }
14777 case PPC::PROBED_ALLOCA_32:
14778 case PPC::PROBED_ALLOCA_64:
14779 return emitProbedAlloca(MI, BB);
14780
14781 case PPC::SPLIT_QUADWORD: {
14782 DebugLoc DL = MI.getDebugLoc();
14783 Register Src = MI.getOperand(2).getReg();
14784 Register Lo = MI.getOperand(0).getReg();
14785 Register Hi = MI.getOperand(1).getReg();
14786 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14787 .addDef(Lo)
14788 .addUse(Src, {}, PPC::sub_gp8_x1);
14789 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14790 .addDef(Hi)
14791 .addUse(Src, {}, PPC::sub_gp8_x0);
14792 break;
14793 }
14794 case PPC::LQX_PSEUDO:
14795 case PPC::STQX_PSEUDO: {
14796 DebugLoc DL = MI.getDebugLoc();
14797 // Ptr is used as the ptr_rc_no_r0 part
14798 // of LQ/STQ's memory operand and adding result of RA and RB,
14799 // so it has to be g8rc_and_g8rc_nox0.
14800 Register Ptr =
14801 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14802 Register Val = MI.getOperand(0).getReg();
14803 Register RA = MI.getOperand(1).getReg();
14804 Register RB = MI.getOperand(2).getReg();
14805 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14806 BuildMI(*BB, MI, DL,
14807 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14808 : TII->get(PPC::STQ))
14809 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14810 .addImm(0)
14811 .addReg(Ptr);
14812 break;
14813 }
14814 case PPC::LWAT_PSEUDO:
14815 case PPC::LDAT_PSEUDO: {
14816 DebugLoc DL = MI.getDebugLoc();
14817 Register DstReg = MI.getOperand(0).getReg();
14818 Register PtrReg = MI.getOperand(1).getReg();
14819 Register ValReg = MI.getOperand(2).getReg();
14820 unsigned FC = MI.getOperand(3).getImm();
14821 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14822 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14823 if (IsLwat)
14824 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14825 .addReg(ValReg)
14826 .addImm(PPC::sub_32);
14827 else
14828 Val64 = ValReg;
14829
14830 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14831 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14832 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14833 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14834 .addReg(UndefG8r)
14835 .addImm(PPC::sub_gp8_x0)
14836 .addReg(Val64)
14837 .addImm(PPC::sub_gp8_x1);
14838
14839 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14840 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14841 .addReg(G8rPair)
14842 .addReg(PtrReg)
14843 .addImm(FC);
14844 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14845 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14846 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14847 if (IsLwat)
14848 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14849 .addReg(Result64, {}, PPC::sub_32);
14850 else
14851 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14852 .addReg(Result64);
14853 break;
14854 }
14855 case PPC::LWAT_COND_PSEUDO:
14856 case PPC::LDAT_COND_PSEUDO: {
14857 DebugLoc DL = MI.getDebugLoc();
14858 Register DstReg = MI.getOperand(0).getReg();
14859 Register PtrReg = MI.getOperand(1).getReg();
14860 unsigned FC = MI.getOperand(2).getImm();
14861 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14862
14863 Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14864 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
14865
14866 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14867 BuildMI(*BB, MI, DL, TII->get(IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14868 PairResult)
14869 .addReg(Pair)
14870 .addReg(PtrReg)
14871 .addImm(FC);
14872 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14873 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14874 .addReg(PairResult, {}, PPC::sub_gp8_x0);
14875 if (IsLwat_Cond)
14876 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14877 .addReg(Result64, {}, PPC::sub_32);
14878 else
14879 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14880 .addReg(Result64);
14881 break;
14882 }
14883 default:
14884 llvm_unreachable("Unexpected instr type to insert");
14885 }
14886
14887 MI.eraseFromParent(); // The pseudo instruction is gone now.
14888 return BB;
14889}
14890
14891//===----------------------------------------------------------------------===//
14892// Target Optimization Hooks
14893//===----------------------------------------------------------------------===//
14894
14895static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14896 // For the estimates, convergence is quadratic, so we essentially double the
14897 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14898 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14899 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14900 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14901 if (VT.getScalarType() == MVT::f64)
14902 RefinementSteps++;
14903 return RefinementSteps;
14904}
14905
14906SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14907 const DenormalMode &Mode,
14908 SDNodeFlags Flags) const {
14909 // We only have VSX Vector Test for software Square Root.
14910 EVT VT = Op.getValueType();
14911 if (!isTypeLegal(MVT::i1) ||
14912 (VT != MVT::f64 &&
14913 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14914 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14915
14916 SDLoc DL(Op);
14917 // The output register of FTSQRT is CR field.
14918 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14919 // ftsqrt BF,FRB
14920 // Let e_b be the unbiased exponent of the double-precision
14921 // floating-point operand in register FRB.
14922 // fe_flag is set to 1 if either of the following conditions occurs.
14923 // - The double-precision floating-point operand in register FRB is a zero,
14924 // a NaN, or an infinity, or a negative value.
14925 // - e_b is less than or equal to -970.
14926 // Otherwise fe_flag is set to 0.
14927 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14928 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14929 // exponent is less than -970)
14930 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14931 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14932 FTSQRT, SRIdxVal),
14933 0);
14934}
14935
14936SDValue
14937PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14938 SelectionDAG &DAG) const {
14939 // We only have VSX Vector Square Root.
14940 EVT VT = Op.getValueType();
14941 if (VT != MVT::f64 &&
14942 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14944
14945 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14946}
14947
14948SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14949 int Enabled, int &RefinementSteps,
14950 bool &UseOneConstNR,
14951 bool Reciprocal) const {
14952 EVT VT = Operand.getValueType();
14953 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14954 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14955 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14956 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14957 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14958 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14959
14960 // The Newton-Raphson computation with a single constant does not provide
14961 // enough accuracy on some CPUs.
14962 UseOneConstNR = !Subtarget.needsTwoConstNR();
14963 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14964 }
14965 return SDValue();
14966}
14967
14968SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14969 int Enabled,
14970 int &RefinementSteps) const {
14971 EVT VT = Operand.getValueType();
14972 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14973 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14974 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14975 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14976 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14977 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14978 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14979 }
14980 return SDValue();
14981}
14982
14984 // Note: This functionality is used only when arcp is enabled, and
14985 // on cores with reciprocal estimates (which are used when arcp is
14986 // enabled for division), this functionality is redundant with the default
14987 // combiner logic (once the division -> reciprocal/multiply transformation
14988 // has taken place). As a result, this matters more for older cores than for
14989 // newer ones.
14990
14991 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14992 // reciprocal if there are two or more FDIVs (for embedded cores with only
14993 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14994 switch (Subtarget.getCPUDirective()) {
14995 default:
14996 return 3;
14997 case PPC::DIR_440:
14998 case PPC::DIR_A2:
14999 case PPC::DIR_E500:
15000 case PPC::DIR_E500mc:
15001 case PPC::DIR_E5500:
15002 return 2;
15003 }
15004}
15005
15006// isConsecutiveLSLoc needs to work even if all adds have not yet been
15007// collapsed, and so we need to look through chains of them.
15009 int64_t& Offset, SelectionDAG &DAG) {
15010 if (DAG.isBaseWithConstantOffset(Loc)) {
15011 Base = Loc.getOperand(0);
15012 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
15013
15014 // The base might itself be a base plus an offset, and if so, accumulate
15015 // that as well.
15016 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
15017 }
15018}
15019
15021 unsigned Bytes, int Dist,
15022 SelectionDAG &DAG) {
15023 if (VT.getSizeInBits() / 8 != Bytes)
15024 return false;
15025
15026 SDValue BaseLoc = Base->getBasePtr();
15027 if (Loc.getOpcode() == ISD::FrameIndex) {
15028 if (BaseLoc.getOpcode() != ISD::FrameIndex)
15029 return false;
15031 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
15032 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
15033 int FS = MFI.getObjectSize(FI);
15034 int BFS = MFI.getObjectSize(BFI);
15035 if (FS != BFS || FS != (int)Bytes) return false;
15036 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
15037 }
15038
15039 SDValue Base1 = Loc, Base2 = BaseLoc;
15040 int64_t Offset1 = 0, Offset2 = 0;
15041 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
15042 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
15043 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
15044 return true;
15045
15046 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15047 const GlobalValue *GV1 = nullptr;
15048 const GlobalValue *GV2 = nullptr;
15049 Offset1 = 0;
15050 Offset2 = 0;
15051 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
15052 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
15053 if (isGA1 && isGA2 && GV1 == GV2)
15054 return Offset1 == (Offset2 + Dist*Bytes);
15055 return false;
15056}
15057
15058// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
15059// not enforce equality of the chain operands.
15061 unsigned Bytes, int Dist,
15062 SelectionDAG &DAG) {
15064 EVT VT = LS->getMemoryVT();
15065 SDValue Loc = LS->getBasePtr();
15066 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
15067 }
15068
15069 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
15070 EVT VT;
15071 switch (N->getConstantOperandVal(1)) {
15072 default: return false;
15073 case Intrinsic::ppc_altivec_lvx:
15074 case Intrinsic::ppc_altivec_lvxl:
15075 case Intrinsic::ppc_vsx_lxvw4x:
15076 case Intrinsic::ppc_vsx_lxvw4x_be:
15077 VT = MVT::v4i32;
15078 break;
15079 case Intrinsic::ppc_vsx_lxvd2x:
15080 case Intrinsic::ppc_vsx_lxvd2x_be:
15081 VT = MVT::v2f64;
15082 break;
15083 case Intrinsic::ppc_altivec_lvebx:
15084 VT = MVT::i8;
15085 break;
15086 case Intrinsic::ppc_altivec_lvehx:
15087 VT = MVT::i16;
15088 break;
15089 case Intrinsic::ppc_altivec_lvewx:
15090 VT = MVT::i32;
15091 break;
15092 }
15093
15094 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
15095 }
15096
15097 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
15098 EVT VT;
15099 switch (N->getConstantOperandVal(1)) {
15100 default: return false;
15101 case Intrinsic::ppc_altivec_stvx:
15102 case Intrinsic::ppc_altivec_stvxl:
15103 case Intrinsic::ppc_vsx_stxvw4x:
15104 VT = MVT::v4i32;
15105 break;
15106 case Intrinsic::ppc_vsx_stxvd2x:
15107 VT = MVT::v2f64;
15108 break;
15109 case Intrinsic::ppc_vsx_stxvw4x_be:
15110 VT = MVT::v4i32;
15111 break;
15112 case Intrinsic::ppc_vsx_stxvd2x_be:
15113 VT = MVT::v2f64;
15114 break;
15115 case Intrinsic::ppc_altivec_stvebx:
15116 VT = MVT::i8;
15117 break;
15118 case Intrinsic::ppc_altivec_stvehx:
15119 VT = MVT::i16;
15120 break;
15121 case Intrinsic::ppc_altivec_stvewx:
15122 VT = MVT::i32;
15123 break;
15124 }
15125
15126 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
15127 }
15128
15129 return false;
15130}
15131
15132// Return true is there is a nearyby consecutive load to the one provided
15133// (regardless of alignment). We search up and down the chain, looking though
15134// token factors and other loads (but nothing else). As a result, a true result
15135// indicates that it is safe to create a new consecutive load adjacent to the
15136// load provided.
15138 SDValue Chain = LD->getChain();
15139 EVT VT = LD->getMemoryVT();
15140
15141 SmallPtrSet<SDNode *, 16> LoadRoots;
15142 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15144
15145 // First, search up the chain, branching to follow all token-factor operands.
15146 // If we find a consecutive load, then we're done, otherwise, record all
15147 // nodes just above the top-level loads and token factors.
15148 while (!Queue.empty()) {
15149 SDNode *ChainNext = Queue.pop_back_val();
15150 if (!Visited.insert(ChainNext).second)
15151 continue;
15152
15153 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
15154 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15155 return true;
15156
15157 if (!Visited.count(ChainLD->getChain().getNode()))
15158 Queue.push_back(ChainLD->getChain().getNode());
15159 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15160 for (const SDUse &O : ChainNext->ops())
15161 if (!Visited.count(O.getNode()))
15162 Queue.push_back(O.getNode());
15163 } else
15164 LoadRoots.insert(ChainNext);
15165 }
15166
15167 // Second, search down the chain, starting from the top-level nodes recorded
15168 // in the first phase. These top-level nodes are the nodes just above all
15169 // loads and token factors. Starting with their uses, recursively look though
15170 // all loads (just the chain uses) and token factors to find a consecutive
15171 // load.
15172 Visited.clear();
15173 Queue.clear();
15174
15175 for (SDNode *I : LoadRoots) {
15176 Queue.push_back(I);
15177
15178 while (!Queue.empty()) {
15179 SDNode *LoadRoot = Queue.pop_back_val();
15180 if (!Visited.insert(LoadRoot).second)
15181 continue;
15182
15183 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15184 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15185 return true;
15186
15187 for (SDNode *U : LoadRoot->users())
15188 if (((isa<MemSDNode>(U) &&
15189 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15190 U->getOpcode() == ISD::TokenFactor) &&
15191 !Visited.count(U))
15192 Queue.push_back(U);
15193 }
15194 }
15195
15196 return false;
15197}
15198
15199/// This function is called when we have proved that a SETCC node can be replaced
15200/// by subtraction (and other supporting instructions) so that the result of
15201/// comparison is kept in a GPR instead of CR. This function is purely for
15202/// codegen purposes and has some flags to guide the codegen process.
15203static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15204 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15205 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15206
15207 // Zero extend the operands to the largest legal integer. Originally, they
15208 // must be of a strictly smaller size.
15209 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15210 DAG.getConstant(Size, DL, MVT::i32));
15211 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15212 DAG.getConstant(Size, DL, MVT::i32));
15213
15214 // Swap if needed. Depends on the condition code.
15215 if (Swap)
15216 std::swap(Op0, Op1);
15217
15218 // Subtract extended integers.
15219 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15220
15221 // Move the sign bit to the least significant position and zero out the rest.
15222 // Now the least significant bit carries the result of original comparison.
15223 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15224 DAG.getConstant(Size - 1, DL, MVT::i32));
15225 auto Final = Shifted;
15226
15227 // Complement the result if needed. Based on the condition code.
15228 if (Complement)
15229 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15230 DAG.getConstant(1, DL, MVT::i64));
15231
15232 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15233}
15234
15235SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15236 DAGCombinerInfo &DCI) const {
15237 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15238
15239 SelectionDAG &DAG = DCI.DAG;
15240 SDLoc DL(N);
15241
15242 // Size of integers being compared has a critical role in the following
15243 // analysis, so we prefer to do this when all types are legal.
15244 if (!DCI.isAfterLegalizeDAG())
15245 return SDValue();
15246
15247 // If all users of SETCC extend its value to a legal integer type
15248 // then we replace SETCC with a subtraction
15249 for (const SDNode *U : N->users())
15250 if (U->getOpcode() != ISD::ZERO_EXTEND)
15251 return SDValue();
15252
15253 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15254 auto OpSize = N->getOperand(0).getValueSizeInBits();
15255
15257
15258 if (OpSize < Size) {
15259 switch (CC) {
15260 default: break;
15261 case ISD::SETULT:
15262 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15263 case ISD::SETULE:
15264 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15265 case ISD::SETUGT:
15266 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15267 case ISD::SETUGE:
15268 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15269 }
15270 }
15271
15272 return SDValue();
15273}
15274
15275SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15276 DAGCombinerInfo &DCI) const {
15277 SelectionDAG &DAG = DCI.DAG;
15278 SDLoc dl(N);
15279
15280 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15281 // If we're tracking CR bits, we need to be careful that we don't have:
15282 // trunc(binary-ops(zext(x), zext(y)))
15283 // or
15284 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15285 // such that we're unnecessarily moving things into GPRs when it would be
15286 // better to keep them in CR bits.
15287
15288 // Note that trunc here can be an actual i1 trunc, or can be the effective
15289 // truncation that comes from a setcc or select_cc.
15290 if (N->getOpcode() == ISD::TRUNCATE &&
15291 N->getValueType(0) != MVT::i1)
15292 return SDValue();
15293
15294 if (N->getOperand(0).getValueType() != MVT::i32 &&
15295 N->getOperand(0).getValueType() != MVT::i64)
15296 return SDValue();
15297
15298 if (N->getOpcode() == ISD::SETCC ||
15299 N->getOpcode() == ISD::SELECT_CC) {
15300 // If we're looking at a comparison, then we need to make sure that the
15301 // high bits (all except for the first) don't matter the result.
15302 ISD::CondCode CC =
15303 cast<CondCodeSDNode>(N->getOperand(
15304 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15305 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15306
15307 if (ISD::isSignedIntSetCC(CC)) {
15308 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15309 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15310 return SDValue();
15311 } else if (ISD::isUnsignedIntSetCC(CC)) {
15312 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15313 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15314 !DAG.MaskedValueIsZero(N->getOperand(1),
15315 APInt::getHighBitsSet(OpBits, OpBits-1)))
15316 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15317 : SDValue());
15318 } else {
15319 // This is neither a signed nor an unsigned comparison, just make sure
15320 // that the high bits are equal.
15321 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15322 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15323
15324 // We don't really care about what is known about the first bit (if
15325 // anything), so pretend that it is known zero for both to ensure they can
15326 // be compared as constants.
15327 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15328 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15329
15330 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15331 Op1Known.getConstant() != Op2Known.getConstant())
15332 return SDValue();
15333 }
15334 }
15335
15336 // We now know that the higher-order bits are irrelevant, we just need to
15337 // make sure that all of the intermediate operations are bit operations, and
15338 // all inputs are extensions.
15339 if (N->getOperand(0).getOpcode() != ISD::AND &&
15340 N->getOperand(0).getOpcode() != ISD::OR &&
15341 N->getOperand(0).getOpcode() != ISD::XOR &&
15342 N->getOperand(0).getOpcode() != ISD::SELECT &&
15343 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15344 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15345 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15346 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15347 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15348 return SDValue();
15349
15350 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15351 N->getOperand(1).getOpcode() != ISD::AND &&
15352 N->getOperand(1).getOpcode() != ISD::OR &&
15353 N->getOperand(1).getOpcode() != ISD::XOR &&
15354 N->getOperand(1).getOpcode() != ISD::SELECT &&
15355 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15356 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15357 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15358 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15359 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15360 return SDValue();
15361
15363 SmallVector<SDValue, 8> BinOps, PromOps;
15364 SmallPtrSet<SDNode *, 16> Visited;
15365
15366 for (unsigned i = 0; i < 2; ++i) {
15367 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15368 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15369 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15370 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15371 isa<ConstantSDNode>(N->getOperand(i)))
15372 Inputs.push_back(N->getOperand(i));
15373 else
15374 BinOps.push_back(N->getOperand(i));
15375
15376 if (N->getOpcode() == ISD::TRUNCATE)
15377 break;
15378 }
15379
15380 // Visit all inputs, collect all binary operations (and, or, xor and
15381 // select) that are all fed by extensions.
15382 while (!BinOps.empty()) {
15383 SDValue BinOp = BinOps.pop_back_val();
15384
15385 if (!Visited.insert(BinOp.getNode()).second)
15386 continue;
15387
15388 PromOps.push_back(BinOp);
15389
15390 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15391 // The condition of the select is not promoted.
15392 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15393 continue;
15394 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15395 continue;
15396
15397 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15398 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15399 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15400 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15401 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15402 Inputs.push_back(BinOp.getOperand(i));
15403 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15404 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15405 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15406 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15407 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15408 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15409 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15410 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15411 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15412 BinOps.push_back(BinOp.getOperand(i));
15413 } else {
15414 // We have an input that is not an extension or another binary
15415 // operation; we'll abort this transformation.
15416 return SDValue();
15417 }
15418 }
15419 }
15420
15421 // Make sure that this is a self-contained cluster of operations (which
15422 // is not quite the same thing as saying that everything has only one
15423 // use).
15424 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15425 if (isa<ConstantSDNode>(Inputs[i]))
15426 continue;
15427
15428 for (const SDNode *User : Inputs[i].getNode()->users()) {
15429 if (User != N && !Visited.count(User))
15430 return SDValue();
15431
15432 // Make sure that we're not going to promote the non-output-value
15433 // operand(s) or SELECT or SELECT_CC.
15434 // FIXME: Although we could sometimes handle this, and it does occur in
15435 // practice that one of the condition inputs to the select is also one of
15436 // the outputs, we currently can't deal with this.
15437 if (User->getOpcode() == ISD::SELECT) {
15438 if (User->getOperand(0) == Inputs[i])
15439 return SDValue();
15440 } else if (User->getOpcode() == ISD::SELECT_CC) {
15441 if (User->getOperand(0) == Inputs[i] ||
15442 User->getOperand(1) == Inputs[i])
15443 return SDValue();
15444 }
15445 }
15446 }
15447
15448 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15449 for (const SDNode *User : PromOps[i].getNode()->users()) {
15450 if (User != N && !Visited.count(User))
15451 return SDValue();
15452
15453 // Make sure that we're not going to promote the non-output-value
15454 // operand(s) or SELECT or SELECT_CC.
15455 // FIXME: Although we could sometimes handle this, and it does occur in
15456 // practice that one of the condition inputs to the select is also one of
15457 // the outputs, we currently can't deal with this.
15458 if (User->getOpcode() == ISD::SELECT) {
15459 if (User->getOperand(0) == PromOps[i])
15460 return SDValue();
15461 } else if (User->getOpcode() == ISD::SELECT_CC) {
15462 if (User->getOperand(0) == PromOps[i] ||
15463 User->getOperand(1) == PromOps[i])
15464 return SDValue();
15465 }
15466 }
15467 }
15468
15469 // Replace all inputs with the extension operand.
15470 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15471 // Constants may have users outside the cluster of to-be-promoted nodes,
15472 // and so we need to replace those as we do the promotions.
15473 if (isa<ConstantSDNode>(Inputs[i]))
15474 continue;
15475 else
15476 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15477 }
15478
15479 std::list<HandleSDNode> PromOpHandles;
15480 for (auto &PromOp : PromOps)
15481 PromOpHandles.emplace_back(PromOp);
15482
15483 // Replace all operations (these are all the same, but have a different
15484 // (i1) return type). DAG.getNode will validate that the types of
15485 // a binary operator match, so go through the list in reverse so that
15486 // we've likely promoted both operands first. Any intermediate truncations or
15487 // extensions disappear.
15488 while (!PromOpHandles.empty()) {
15489 SDValue PromOp = PromOpHandles.back().getValue();
15490 PromOpHandles.pop_back();
15491
15492 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15493 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15494 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15495 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15496 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15497 PromOp.getOperand(0).getValueType() != MVT::i1) {
15498 // The operand is not yet ready (see comment below).
15499 PromOpHandles.emplace_front(PromOp);
15500 continue;
15501 }
15502
15503 SDValue RepValue = PromOp.getOperand(0);
15504 if (isa<ConstantSDNode>(RepValue))
15505 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15506
15507 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15508 continue;
15509 }
15510
15511 unsigned C;
15512 switch (PromOp.getOpcode()) {
15513 default: C = 0; break;
15514 case ISD::SELECT: C = 1; break;
15515 case ISD::SELECT_CC: C = 2; break;
15516 }
15517
15518 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15519 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15520 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15521 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15522 // The to-be-promoted operands of this node have not yet been
15523 // promoted (this should be rare because we're going through the
15524 // list backward, but if one of the operands has several users in
15525 // this cluster of to-be-promoted nodes, it is possible).
15526 PromOpHandles.emplace_front(PromOp);
15527 continue;
15528 }
15529
15531
15532 // If there are any constant inputs, make sure they're replaced now.
15533 for (unsigned i = 0; i < 2; ++i)
15534 if (isa<ConstantSDNode>(Ops[C+i]))
15535 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15536
15537 DAG.ReplaceAllUsesOfValueWith(PromOp,
15538 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15539 }
15540
15541 // Now we're left with the initial truncation itself.
15542 if (N->getOpcode() == ISD::TRUNCATE)
15543 return N->getOperand(0);
15544
15545 // Otherwise, this is a comparison. The operands to be compared have just
15546 // changed type (to i1), but everything else is the same.
15547 return SDValue(N, 0);
15548}
15549
15550SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15551 DAGCombinerInfo &DCI) const {
15552 SelectionDAG &DAG = DCI.DAG;
15553 SDLoc dl(N);
15554
15555 // If we're tracking CR bits, we need to be careful that we don't have:
15556 // zext(binary-ops(trunc(x), trunc(y)))
15557 // or
15558 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15559 // such that we're unnecessarily moving things into CR bits that can more
15560 // efficiently stay in GPRs. Note that if we're not certain that the high
15561 // bits are set as required by the final extension, we still may need to do
15562 // some masking to get the proper behavior.
15563
15564 // This same functionality is important on PPC64 when dealing with
15565 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15566 // the return values of functions. Because it is so similar, it is handled
15567 // here as well.
15568
15569 if (N->getValueType(0) != MVT::i32 &&
15570 N->getValueType(0) != MVT::i64)
15571 return SDValue();
15572
15573 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15574 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15575 return SDValue();
15576
15577 if (N->getOperand(0).getOpcode() != ISD::AND &&
15578 N->getOperand(0).getOpcode() != ISD::OR &&
15579 N->getOperand(0).getOpcode() != ISD::XOR &&
15580 N->getOperand(0).getOpcode() != ISD::SELECT &&
15581 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15582 return SDValue();
15583
15585 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15586 SmallPtrSet<SDNode *, 16> Visited;
15587
15588 // Visit all inputs, collect all binary operations (and, or, xor and
15589 // select) that are all fed by truncations.
15590 while (!BinOps.empty()) {
15591 SDValue BinOp = BinOps.pop_back_val();
15592
15593 if (!Visited.insert(BinOp.getNode()).second)
15594 continue;
15595
15596 PromOps.push_back(BinOp);
15597
15598 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15599 // The condition of the select is not promoted.
15600 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15601 continue;
15602 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15603 continue;
15604
15605 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15606 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15607 Inputs.push_back(BinOp.getOperand(i));
15608 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15609 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15610 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15611 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15612 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15613 BinOps.push_back(BinOp.getOperand(i));
15614 } else {
15615 // We have an input that is not a truncation or another binary
15616 // operation; we'll abort this transformation.
15617 return SDValue();
15618 }
15619 }
15620 }
15621
15622 // The operands of a select that must be truncated when the select is
15623 // promoted because the operand is actually part of the to-be-promoted set.
15624 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15625
15626 // Make sure that this is a self-contained cluster of operations (which
15627 // is not quite the same thing as saying that everything has only one
15628 // use).
15629 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15630 if (isa<ConstantSDNode>(Inputs[i]))
15631 continue;
15632
15633 for (SDNode *User : Inputs[i].getNode()->users()) {
15634 if (User != N && !Visited.count(User))
15635 return SDValue();
15636
15637 // If we're going to promote the non-output-value operand(s) or SELECT or
15638 // SELECT_CC, record them for truncation.
15639 if (User->getOpcode() == ISD::SELECT) {
15640 if (User->getOperand(0) == Inputs[i])
15641 SelectTruncOp[0].insert(std::make_pair(User,
15642 User->getOperand(0).getValueType()));
15643 } else if (User->getOpcode() == ISD::SELECT_CC) {
15644 if (User->getOperand(0) == Inputs[i])
15645 SelectTruncOp[0].insert(std::make_pair(User,
15646 User->getOperand(0).getValueType()));
15647 if (User->getOperand(1) == Inputs[i])
15648 SelectTruncOp[1].insert(std::make_pair(User,
15649 User->getOperand(1).getValueType()));
15650 }
15651 }
15652 }
15653
15654 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15655 for (SDNode *User : PromOps[i].getNode()->users()) {
15656 if (User != N && !Visited.count(User))
15657 return SDValue();
15658
15659 // If we're going to promote the non-output-value operand(s) or SELECT or
15660 // SELECT_CC, record them for truncation.
15661 if (User->getOpcode() == ISD::SELECT) {
15662 if (User->getOperand(0) == PromOps[i])
15663 SelectTruncOp[0].insert(std::make_pair(User,
15664 User->getOperand(0).getValueType()));
15665 } else if (User->getOpcode() == ISD::SELECT_CC) {
15666 if (User->getOperand(0) == PromOps[i])
15667 SelectTruncOp[0].insert(std::make_pair(User,
15668 User->getOperand(0).getValueType()));
15669 if (User->getOperand(1) == PromOps[i])
15670 SelectTruncOp[1].insert(std::make_pair(User,
15671 User->getOperand(1).getValueType()));
15672 }
15673 }
15674 }
15675
15676 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15677 bool ReallyNeedsExt = false;
15678 if (N->getOpcode() != ISD::ANY_EXTEND) {
15679 // If all of the inputs are not already sign/zero extended, then
15680 // we'll still need to do that at the end.
15681 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15682 if (isa<ConstantSDNode>(Inputs[i]))
15683 continue;
15684
15685 unsigned OpBits =
15686 Inputs[i].getOperand(0).getValueSizeInBits();
15687 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15688
15689 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15690 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15691 APInt::getHighBitsSet(OpBits,
15692 OpBits-PromBits))) ||
15693 (N->getOpcode() == ISD::SIGN_EXTEND &&
15694 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15695 (OpBits-(PromBits-1)))) {
15696 ReallyNeedsExt = true;
15697 break;
15698 }
15699 }
15700 }
15701
15702 // Convert PromOps to handles before doing any RAUW operations, as these
15703 // may CSE with existing nodes, deleting the originals.
15704 std::list<HandleSDNode> PromOpHandles;
15705 for (auto &PromOp : PromOps)
15706 PromOpHandles.emplace_back(PromOp);
15707
15708 // Replace all inputs, either with the truncation operand, or a
15709 // truncation or extension to the final output type.
15710 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15711 // Constant inputs need to be replaced with the to-be-promoted nodes that
15712 // use them because they might have users outside of the cluster of
15713 // promoted nodes.
15714 if (isa<ConstantSDNode>(Inputs[i]))
15715 continue;
15716
15717 SDValue InSrc = Inputs[i].getOperand(0);
15718 if (Inputs[i].getValueType() == N->getValueType(0))
15719 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15720 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15721 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15722 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15723 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15724 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15725 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15726 else
15727 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15728 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15729 }
15730
15731 // Replace all operations (these are all the same, but have a different
15732 // (promoted) return type). DAG.getNode will validate that the types of
15733 // a binary operator match, so go through the list in reverse so that
15734 // we've likely promoted both operands first.
15735 while (!PromOpHandles.empty()) {
15736 SDValue PromOp = PromOpHandles.back().getValue();
15737 PromOpHandles.pop_back();
15738
15739 unsigned C;
15740 switch (PromOp.getOpcode()) {
15741 default: C = 0; break;
15742 case ISD::SELECT: C = 1; break;
15743 case ISD::SELECT_CC: C = 2; break;
15744 }
15745
15746 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15747 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15748 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15749 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15750 // The to-be-promoted operands of this node have not yet been
15751 // promoted (this should be rare because we're going through the
15752 // list backward, but if one of the operands has several users in
15753 // this cluster of to-be-promoted nodes, it is possible).
15754 PromOpHandles.emplace_front(PromOp);
15755 continue;
15756 }
15757
15758 // For SELECT and SELECT_CC nodes, we do a similar check for any
15759 // to-be-promoted comparison inputs.
15760 if (PromOp.getOpcode() == ISD::SELECT ||
15761 PromOp.getOpcode() == ISD::SELECT_CC) {
15762 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15763 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15764 (SelectTruncOp[1].count(PromOp.getNode()) &&
15765 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15766 PromOpHandles.emplace_front(PromOp);
15767 continue;
15768 }
15769 }
15770
15772
15773 // If this node has constant inputs, then they'll need to be promoted here.
15774 for (unsigned i = 0; i < 2; ++i) {
15775 if (!isa<ConstantSDNode>(Ops[C+i]))
15776 continue;
15777 if (Ops[C+i].getValueType() == N->getValueType(0))
15778 continue;
15779
15780 if (N->getOpcode() == ISD::SIGN_EXTEND)
15781 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15782 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15783 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15784 else
15785 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15786 }
15787
15788 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15789 // truncate them again to the original value type.
15790 if (PromOp.getOpcode() == ISD::SELECT ||
15791 PromOp.getOpcode() == ISD::SELECT_CC) {
15792 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15793 if (SI0 != SelectTruncOp[0].end())
15794 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15795 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15796 if (SI1 != SelectTruncOp[1].end())
15797 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15798 }
15799
15800 DAG.ReplaceAllUsesOfValueWith(PromOp,
15801 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15802 }
15803
15804 // Now we're left with the initial extension itself.
15805 if (!ReallyNeedsExt)
15806 return N->getOperand(0);
15807
15808 // To zero extend, just mask off everything except for the first bit (in the
15809 // i1 case).
15810 if (N->getOpcode() == ISD::ZERO_EXTEND)
15811 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15813 N->getValueSizeInBits(0), PromBits),
15814 dl, N->getValueType(0)));
15815
15816 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15817 "Invalid extension type");
15818 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15819 SDValue ShiftCst =
15820 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15821 return DAG.getNode(
15822 ISD::SRA, dl, N->getValueType(0),
15823 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15824 ShiftCst);
15825}
15826
15827// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15829
15830 auto isValidForConvert = [](SDValue &Operand) {
15831 if (!Operand.hasOneUse())
15832 return false;
15833
15834 if (Operand.getValueType() != MVT::i128)
15835 return false;
15836
15837 if (Operand.getOpcode() == ISD::Constant)
15838 return true;
15839
15840 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15841 if (!LoadNode)
15842 return false;
15843
15844 // If memory operation is volatile, do not perform any
15845 // optimization or transformation. Volatile operations must be preserved
15846 // as written to ensure correct program behavior, so we return an empty
15847 // SDValue to indicate no action.
15848
15849 if (LoadNode->isVolatile())
15850 return false;
15851
15852 // Only combine loads if both use the unindexed addressing mode.
15853 // PowerPC AltiVec/VMX does not support vector loads or stores with
15854 // pre/post-increment addressing. Indexed modes may imply implicit
15855 // pointer updates, which are not compatible with AltiVec vector
15856 // instructions.
15857 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15858 return false;
15859
15860 // Only combine loads if both are non-extending loads
15861 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15862 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15863 // loaded value's semantics and are not compatible with vector loads.
15864 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15865 return false;
15866
15867 return true;
15868 };
15869
15870 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15871}
15872
15874 const SDLoc &DL) {
15875
15876 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15877
15878 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15879 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15880 "CC mus be ISD::SETNE or ISD::SETEQ");
15881
15882 auto getV16i8Load = [&](const SDValue &Operand) {
15883 if (Operand.getOpcode() == ISD::Constant)
15884 return DAG.getBitcast(MVT::v16i8, Operand);
15885
15886 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15887
15888 auto *LoadNode = cast<LoadSDNode>(Operand);
15889 SDValue NewLoad =
15890 DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15891 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15892 DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1));
15893 return NewLoad;
15894 };
15895
15896 // Following code transforms the DAG
15897 // t0: ch,glue = EntryToken
15898 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15899 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15900 // undef:i64
15901 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15902 // t5: i128,ch =
15903 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15904 // setcc t3, t5, setne:ch
15905 //
15906 // ---->
15907 //
15908 // t0: ch,glue = EntryToken
15909 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15910 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15911 // undef:i64
15912 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15913 // t5: v16i8,ch =
15914 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15915 // t6: i32 =
15916 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15917 // Constant:i32<2>, t3, t5
15918 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15919
15920 // Or transforms the DAG
15921 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15922 // t8: i1 =
15923 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15924 //
15925 // --->
15926 //
15927 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15928 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15929 // t7: i32 =
15930 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15931
15932 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15933 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15934
15935 SDValue IntrID =
15936 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15937 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15938 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15939 IntrID, CRSel, LHSVec, RHSVec);
15940 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15941 // so we need to invert the CC opcode.
15942 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15943 DAG.getConstant(0, DL, MVT::i32),
15944 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15945}
15946
15947// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15948// If it is , return true; otherwise return false.
15950 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15951
15952 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15953 if (CC != ISD::SETEQ)
15954 return false;
15955
15956 SDValue LHS = N->getOperand(0);
15957 SDValue RHS = N->getOperand(1);
15958
15959 // Check the `SDValue &V` is from `and` with `1`.
15960 auto IsAndWithOne = [](SDValue &V) {
15961 if (V.getOpcode() == ISD::AND) {
15962 for (const SDValue &Op : V->ops())
15963 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15964 if (C->isOne())
15965 return true;
15966 }
15967 return false;
15968 };
15969
15970 // Check whether the SETCC compare with zero.
15971 auto IsCompareWithZero = [](SDValue &V) {
15972 if (auto *C = dyn_cast<ConstantSDNode>(V))
15973 if (C->isZero())
15974 return true;
15975 return false;
15976 };
15977
15978 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15979 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15980}
15981
15982// You must check whether the `SDNode* N` can be converted to Xori using
15983// the function `static bool canConvertSETCCToXori(SDNode *N)`
15984// before calling the function; otherwise, it may produce incorrect results.
15986
15987 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15988 SDValue LHS = N->getOperand(0);
15989 SDValue RHS = N->getOperand(1);
15990 SDLoc DL(N);
15991
15992 [[maybe_unused]] ISD::CondCode CC =
15993 cast<CondCodeSDNode>(N->getOperand(2))->get();
15994 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15995 // Rewrite it as XORI (and X, 1), 1.
15996 auto MakeXor1 = [&](SDValue V) {
15997 EVT VT = V.getValueType();
15998 SDValue One = DAG.getConstant(1, DL, VT);
15999 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
16000 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
16001 };
16002
16003 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
16004 return MakeXor1(LHS);
16005
16006 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
16007 return MakeXor1(RHS);
16008
16009 llvm_unreachable("Should not reach here.");
16010}
16011
16012// Match `sext(setcc X, 0, eq)` and turn it into an ADDIC/SUBFE sequence.
16013//
16014// This generates code for:
16015// X == 0 ? -1 : 0
16016//
16017// On pre-ISA 3.1 targets, this is better than the longer CNTLZW/SRWI/NEG
16018// sequence. This is useful for cases like:
16019// uint8_t f(uint8_t x) { return (x == 0) ? -1 : 0; }
16020//
16021// ISA 3.1+ is skipped because those targets can use SETBC.
16022
16023SDValue PPCTargetLowering::combineSignExtendSetCC(SDNode *N,
16024 DAGCombinerInfo &DCI) const {
16025 if (Subtarget.isISA3_1())
16026 return SDValue();
16027
16028 EVT VT = N->getValueType(0);
16029 if (VT != MVT::i32 && VT != MVT::i64)
16030 return SDValue();
16031
16032 SDValue N0 = N->getOperand(0);
16033 if (N0.getOpcode() != ISD::SETCC)
16034 return SDValue();
16035
16037 SDValue LHS = N0.getOperand(0);
16038 SDValue RHS = N0.getOperand(1);
16039
16040 // Not match: sext (setcc x, 0, eq) or sext (setcc 0, x, eq)
16041 if (CC != ISD::SETEQ || (!isNullConstant(LHS) && !isNullConstant(RHS)))
16042 return SDValue();
16043
16044 SDLoc dl(N);
16045 SelectionDAG &DAG = DCI.DAG;
16047 EVT XVT = X.getValueType(); // The type of x in the setcc x, 0, eq.
16048
16049 if ((XVT == MVT::i64 || VT == MVT::i64) && !Subtarget.isPPC64())
16050 return SDValue();
16051
16052 // On PPC64, i32 carry operations use the full 64-bit XER register,
16053 // so we must use i64 operations to avoid incorrect results.
16054 // Use i64 operations and truncate the result if needed.
16055 if (XVT != MVT::i64 && Subtarget.isPPC64())
16056 // Zero-extend if input type is not 64bits.
16057 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, X);
16058
16059 EVT OpVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
16060
16061 // Generate: SUBFE(ADDC(X, -1)).
16062 SDValue MinusOne = DAG.getAllOnesConstant(dl, OpVT);
16063 SDValue Addc =
16064 DAG.getNode(PPCISD::ADDC, dl, DAG.getVTList(OpVT, MVT::i32), X, MinusOne);
16065 SDValue Carry = Addc.getValue(1);
16066 SDValue Sube = DAG.getNode(PPCISD::SUBE, dl, DAG.getVTList(OpVT, MVT::i32),
16067 Addc, Addc, Carry);
16068
16069 // Truncate back to i32 if we used i64 operations.
16070 if (OpVT == MVT::i64 && VT == MVT::i32)
16071 return DAG.getNode(ISD::TRUNCATE, dl, VT, Sube);
16072
16073 return Sube;
16074}
16075
16076SDValue PPCTargetLowering::combineSetCC(SDNode *N,
16077 DAGCombinerInfo &DCI) const {
16078 assert(N->getOpcode() == ISD::SETCC &&
16079 "Should be called with a SETCC node");
16080
16081 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
16082 // If it is, rewrite it as XORI (and X, 1), 1.
16084 return ConvertSETCCToXori(N, DCI.DAG);
16085
16086 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
16087 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
16088 SDValue LHS = N->getOperand(0);
16089 SDValue RHS = N->getOperand(1);
16090
16091 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
16092 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
16093 LHS.hasOneUse())
16094 std::swap(LHS, RHS);
16095
16096 // x == 0-y --> x+y == 0
16097 // x != 0-y --> x+y != 0
16098 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
16099 RHS.hasOneUse()) {
16100 SDLoc DL(N);
16101 SelectionDAG &DAG = DCI.DAG;
16102 EVT VT = N->getValueType(0);
16103 EVT OpVT = LHS.getValueType();
16104 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
16105 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
16106 }
16107
16108 // Optimization: Fold i128 equality/inequality compares of two loads into a
16109 // vectorized compare using vcmpequb.p when Altivec is available.
16110 //
16111 // Rationale:
16112 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
16113 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
16114 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
16115 // perform a full 128-bit equality check in a single vector compare.
16116 //
16117 // Example Result:
16118 // This transformation replaces memcmp(a, b, 16) with two vector loads
16119 // and one vector compare instruction.
16120
16121 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
16122 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
16123 }
16124
16125 return DAGCombineTruncBoolExt(N, DCI);
16126}
16127
16128// Is this an extending load from an f32 to an f64?
16129static bool isFPExtLoad(SDValue Op) {
16130 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
16131 return LD->getExtensionType() == ISD::EXTLOAD &&
16132 Op.getValueType() == MVT::f64;
16133 return false;
16134}
16135
16136/// Reduces the number of fp-to-int conversion when building a vector.
16137///
16138/// If this vector is built out of floating to integer conversions,
16139/// transform it to a vector built out of floating point values followed by a
16140/// single floating to integer conversion of the vector.
16141/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
16142/// becomes (fptosi (build_vector ($A, $B, ...)))
16143SDValue PPCTargetLowering::
16144combineElementTruncationToVectorTruncation(SDNode *N,
16145 DAGCombinerInfo &DCI) const {
16146 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16147 "Should be called with a BUILD_VECTOR node");
16148
16149 SelectionDAG &DAG = DCI.DAG;
16150 SDLoc dl(N);
16151
16152 SDValue FirstInput = N->getOperand(0);
16153 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
16154 "The input operand must be an fp-to-int conversion.");
16155
16156 // This combine happens after legalization so the fp_to_[su]i nodes are
16157 // already converted to PPCSISD nodes.
16158 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
16159 if (FirstConversion == PPCISD::FCTIDZ ||
16160 FirstConversion == PPCISD::FCTIDUZ ||
16161 FirstConversion == PPCISD::FCTIWZ ||
16162 FirstConversion == PPCISD::FCTIWUZ) {
16163 bool IsSplat = true;
16164 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
16165 FirstConversion == PPCISD::FCTIWUZ;
16166 EVT SrcVT = FirstInput.getOperand(0).getValueType();
16168 EVT TargetVT = N->getValueType(0);
16169 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16170 SDValue NextOp = N->getOperand(i);
16171 if (NextOp.getOpcode() != PPCISD::MFVSR)
16172 return SDValue();
16173 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
16174 if (NextConversion != FirstConversion)
16175 return SDValue();
16176 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
16177 // This is not valid if the input was originally double precision. It is
16178 // also not profitable to do unless this is an extending load in which
16179 // case doing this combine will allow us to combine consecutive loads.
16180 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
16181 return SDValue();
16182 if (N->getOperand(i) != FirstInput)
16183 IsSplat = false;
16184 }
16185
16186 // If this is a splat, we leave it as-is since there will be only a single
16187 // fp-to-int conversion followed by a splat of the integer. This is better
16188 // for 32-bit and smaller ints and neutral for 64-bit ints.
16189 if (IsSplat)
16190 return SDValue();
16191
16192 // Now that we know we have the right type of node, get its operands
16193 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16194 SDValue In = N->getOperand(i).getOperand(0);
16195 if (Is32Bit) {
16196 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16197 // here, we know that all inputs are extending loads so this is safe).
16198 if (In.isUndef())
16199 Ops.push_back(DAG.getUNDEF(SrcVT));
16200 else {
16201 SDValue Trunc =
16202 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
16203 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
16204 Ops.push_back(Trunc);
16205 }
16206 } else
16207 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
16208 }
16209
16210 unsigned Opcode;
16211 if (FirstConversion == PPCISD::FCTIDZ ||
16212 FirstConversion == PPCISD::FCTIWZ)
16213 Opcode = ISD::FP_TO_SINT;
16214 else
16215 Opcode = ISD::FP_TO_UINT;
16216
16217 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16218 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
16219 return DAG.getNode(Opcode, dl, TargetVT, BV);
16220 }
16221 return SDValue();
16222}
16223
16224// LXVKQ instruction load VSX vector with a special quadword value
16225// based on an immediate value. This helper method returns the details of the
16226// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16227// to help generate the LXVKQ instruction and the subsequent shift instruction
16228// required to match the original build vector pattern.
16229
16230// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16231using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16232
16233static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16234
16235 // LXVKQ instruction loads the Quadword value:
16236 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16237 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16238 static const uint32_t Uim = 16;
16239
16240 // Check for direct LXVKQ match (no shift needed)
16241 if (FullVal == BasePattern)
16242 return std::make_tuple(Uim, uint8_t{0});
16243
16244 // Check if FullValue is 1 (the result of the base pattern >> 127)
16245 if (FullVal == APInt(128, 1))
16246 return std::make_tuple(Uim, uint8_t{127});
16247
16248 return std::nullopt;
16249}
16250
16251/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16252/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16253/// LXVKQ instruction load VSX vector with a special quadword value based on an
16254/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16255/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16256/// This can be used to inline the build vector constants that have the
16257/// following patterns:
16258///
16259/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16260/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16261/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16262/// combination of splatting and right shift instructions.
16263
16264SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16265 SelectionDAG &DAG) const {
16266
16267 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16268 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16269
16270 // This transformation is only supported if we are loading either a byte,
16271 // halfword, word, or doubleword.
16272 EVT VT = Op.getValueType();
16273 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16274 VT == MVT::v2i64))
16275 return SDValue();
16276
16277 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16278 << VT.getEVTString() << "): ";
16279 Op->dump());
16280
16281 unsigned NumElems = VT.getVectorNumElements();
16282 unsigned ElemBits = VT.getScalarSizeInBits();
16283
16284 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16285
16286 // Check for Non-constant operand in the build vector.
16287 for (const SDValue &Operand : Op.getNode()->op_values()) {
16288 if (!isa<ConstantSDNode>(Operand))
16289 return SDValue();
16290 }
16291
16292 // Assemble build vector operands as a 128-bit register value
16293 // We need to reconstruct what the 128-bit register pattern would be
16294 // that produces this vector when interpreted with the current endianness
16295 APInt FullVal = APInt::getZero(128);
16296
16297 for (unsigned Index = 0; Index < NumElems; ++Index) {
16298 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16299
16300 // Get element value as raw bits (zero-extended)
16301 uint64_t ElemValue = C->getZExtValue();
16302
16303 // Mask to element size to ensure we only get the relevant bits
16304 if (ElemBits < 64)
16305 ElemValue &= ((1ULL << ElemBits) - 1);
16306
16307 // Calculate bit position for this element in the 128-bit register
16308 unsigned BitPos =
16309 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16310
16311 // Create APInt for the element value and shift it to correct position
16312 APInt ElemAPInt(128, ElemValue);
16313 ElemAPInt <<= BitPos;
16314
16315 // Place the element value at the correct bit position
16316 FullVal |= ElemAPInt;
16317 }
16318
16319 if (FullVal.isZero() || FullVal.isAllOnes())
16320 return SDValue();
16321
16322 if (auto UIMOpt = getPatternInfo(FullVal)) {
16323 const auto &[Uim, ShiftAmount] = *UIMOpt;
16324 SDLoc Dl(Op);
16325
16326 // Generate LXVKQ instruction if the shift amount is zero.
16327 if (ShiftAmount == 0) {
16328 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16329 SDValue LxvkqInstr =
16330 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16332 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16333 LxvkqInstr.dump());
16334 return LxvkqInstr;
16335 }
16336
16337 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16338
16339 // The right shifted pattern can be constructed using a combination of
16340 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16341 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16342 // value 255.
16343 SDValue ShiftAmountVec =
16344 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16345 DAG.getTargetConstant(255, Dl, MVT::i32)),
16346 0);
16347 // Generate appropriate right shift instruction
16348 SDValue ShiftVec = SDValue(
16349 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16350 0);
16352 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16353 ShiftVec.dump());
16354 return ShiftVec;
16355 }
16356 // No patterns matched for build vectors.
16357 return SDValue();
16358}
16359
16360/// Reduce the number of loads when building a vector.
16361///
16362/// Building a vector out of multiple loads can be converted to a load
16363/// of the vector type if the loads are consecutive. If the loads are
16364/// consecutive but in descending order, a shuffle is added at the end
16365/// to reorder the vector.
16367 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16368 "Should be called with a BUILD_VECTOR node");
16369
16370 SDLoc dl(N);
16371
16372 // Return early for non byte-sized type, as they can't be consecutive.
16373 if (!N->getValueType(0).getVectorElementType().isByteSized())
16374 return SDValue();
16375
16376 bool InputsAreConsecutiveLoads = true;
16377 bool InputsAreReverseConsecutive = true;
16378 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16379 SDValue FirstInput = N->getOperand(0);
16380 bool IsRoundOfExtLoad = false;
16381 LoadSDNode *FirstLoad = nullptr;
16382
16383 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16384 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16385 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16386 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16387 }
16388 // Not a build vector of (possibly fp_rounded) loads.
16389 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16390 N->getNumOperands() == 1)
16391 return SDValue();
16392
16393 if (!IsRoundOfExtLoad)
16394 FirstLoad = cast<LoadSDNode>(FirstInput);
16395
16397 InputLoads.push_back(FirstLoad);
16398 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16399 // If any inputs are fp_round(extload), they all must be.
16400 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16401 return SDValue();
16402
16403 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16404 N->getOperand(i);
16405 if (NextInput.getOpcode() != ISD::LOAD)
16406 return SDValue();
16407
16408 SDValue PreviousInput =
16409 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16410 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16411 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16412
16413 // If any inputs are fp_round(extload), they all must be.
16414 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16415 return SDValue();
16416
16417 // We only care about regular loads. The PPC-specific load intrinsics
16418 // will not lead to a merge opportunity.
16419 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16420 InputsAreConsecutiveLoads = false;
16421 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16422 InputsAreReverseConsecutive = false;
16423
16424 // Exit early if the loads are neither consecutive nor reverse consecutive.
16425 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16426 return SDValue();
16427 InputLoads.push_back(LD2);
16428 }
16429
16430 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16431 "The loads cannot be both consecutive and reverse consecutive.");
16432
16433 SDValue WideLoad;
16434 SDValue ReturnSDVal;
16435 if (InputsAreConsecutiveLoads) {
16436 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16437 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16438 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16439 FirstLoad->getAlign());
16440 ReturnSDVal = WideLoad;
16441 } else if (InputsAreReverseConsecutive) {
16442 LoadSDNode *LastLoad = InputLoads.back();
16443 assert(LastLoad && "Input needs to be a LoadSDNode.");
16444 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16445 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16446 LastLoad->getAlign());
16448 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16449 Ops.push_back(i);
16450
16451 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16452 DAG.getUNDEF(N->getValueType(0)), Ops);
16453 } else
16454 return SDValue();
16455
16456 for (auto *LD : InputLoads)
16457 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16458 return ReturnSDVal;
16459}
16460
16461// This function adds the required vector_shuffle needed to get
16462// the elements of the vector extract in the correct position
16463// as specified by the CorrectElems encoding.
16465 SDValue Input, uint64_t Elems,
16466 uint64_t CorrectElems) {
16467 SDLoc dl(N);
16468
16469 unsigned NumElems = Input.getValueType().getVectorNumElements();
16470 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16471
16472 // Knowing the element indices being extracted from the original
16473 // vector and the order in which they're being inserted, just put
16474 // them at element indices required for the instruction.
16475 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16476 if (DAG.getDataLayout().isLittleEndian())
16477 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16478 else
16479 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16480 CorrectElems = CorrectElems >> 8;
16481 Elems = Elems >> 8;
16482 }
16483
16484 SDValue Shuffle =
16485 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16486 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16487
16488 EVT VT = N->getValueType(0);
16489 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16490
16491 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16492 Input.getValueType().getVectorElementType(),
16494 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16495 DAG.getValueType(ExtVT));
16496}
16497
16498// Look for build vector patterns where input operands come from sign
16499// extended vector_extract elements of specific indices. If the correct indices
16500// aren't used, add a vector shuffle to fix up the indices and create
16501// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16502// during instruction selection.
16504 // This array encodes the indices that the vector sign extend instructions
16505 // extract from when extending from one type to another for both BE and LE.
16506 // The right nibble of each byte corresponds to the LE incides.
16507 // and the left nibble of each byte corresponds to the BE incides.
16508 // For example: 0x3074B8FC byte->word
16509 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16510 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16511 // For example: 0x000070F8 byte->double word
16512 // For LE: the allowed indices are: 0x0,0x8
16513 // For BE: the allowed indices are: 0x7,0xF
16514 uint64_t TargetElems[] = {
16515 0x3074B8FC, // b->w
16516 0x000070F8, // b->d
16517 0x10325476, // h->w
16518 0x00003074, // h->d
16519 0x00001032, // w->d
16520 };
16521
16522 uint64_t Elems = 0;
16523 int Index;
16524 SDValue Input;
16525
16526 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16527 if (!Op)
16528 return false;
16529 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16530 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16531 return false;
16532
16533 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16534 // of the right width.
16535 SDValue Extract = Op.getOperand(0);
16536 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16537 Extract = Extract.getOperand(0);
16538 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16539 return false;
16540
16542 if (!ExtOp)
16543 return false;
16544
16545 Index = ExtOp->getZExtValue();
16546 if (Input && Input != Extract.getOperand(0))
16547 return false;
16548
16549 if (!Input)
16550 Input = Extract.getOperand(0);
16551
16552 Elems = Elems << 8;
16553 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16554 Elems |= Index;
16555
16556 return true;
16557 };
16558
16559 // If the build vector operands aren't sign extended vector extracts,
16560 // of the same input vector, then return.
16561 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16562 if (!isSExtOfVecExtract(N->getOperand(i))) {
16563 return SDValue();
16564 }
16565 }
16566
16567 // If the vector extract indices are not correct, add the appropriate
16568 // vector_shuffle.
16569 int TgtElemArrayIdx;
16570 int InputSize = Input.getValueType().getScalarSizeInBits();
16571 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16572 if (InputSize + OutputSize == 40)
16573 TgtElemArrayIdx = 0;
16574 else if (InputSize + OutputSize == 72)
16575 TgtElemArrayIdx = 1;
16576 else if (InputSize + OutputSize == 48)
16577 TgtElemArrayIdx = 2;
16578 else if (InputSize + OutputSize == 80)
16579 TgtElemArrayIdx = 3;
16580 else if (InputSize + OutputSize == 96)
16581 TgtElemArrayIdx = 4;
16582 else
16583 return SDValue();
16584
16585 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16586 CorrectElems = DAG.getDataLayout().isLittleEndian()
16587 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16588 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16589 if (Elems != CorrectElems) {
16590 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16591 }
16592
16593 // Regular lowering will catch cases where a shuffle is not needed.
16594 return SDValue();
16595}
16596
16597// Look for the pattern of a load from a narrow width to i128, feeding
16598// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16599// (LXVRZX). This node represents a zero extending load that will be matched
16600// to the Load VSX Vector Rightmost instructions.
16602 SDLoc DL(N);
16603
16604 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16605 if (N->getValueType(0) != MVT::v1i128)
16606 return SDValue();
16607
16608 SDValue Operand = N->getOperand(0);
16609 // Proceed with the transformation if the operand to the BUILD_VECTOR
16610 // is a load instruction.
16611 if (Operand.getOpcode() != ISD::LOAD)
16612 return SDValue();
16613
16614 auto *LD = cast<LoadSDNode>(Operand);
16615 EVT MemoryType = LD->getMemoryVT();
16616
16617 // This transformation is only valid if the we are loading either a byte,
16618 // halfword, word, or doubleword.
16619 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16620 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16621
16622 // Ensure that the load from the narrow width is being zero extended to i128.
16623 if (!ValidLDType ||
16624 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16625 LD->getExtensionType() != ISD::EXTLOAD))
16626 return SDValue();
16627
16628 SDValue LoadOps[] = {
16629 LD->getChain(), LD->getBasePtr(),
16630 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16631
16632 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16633 DAG.getVTList(MVT::v1i128, MVT::Other),
16634 LoadOps, MemoryType, LD->getMemOperand());
16635}
16636
16637SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16638 DAGCombinerInfo &DCI) const {
16639 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16640 "Should be called with a BUILD_VECTOR node");
16641
16642 SelectionDAG &DAG = DCI.DAG;
16643 SDLoc dl(N);
16644
16645 if (!Subtarget.hasVSX())
16646 return SDValue();
16647
16648 // The target independent DAG combiner will leave a build_vector of
16649 // float-to-int conversions intact. We can generate MUCH better code for
16650 // a float-to-int conversion of a vector of floats.
16651 SDValue FirstInput = N->getOperand(0);
16652 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16653 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16654 if (Reduced)
16655 return Reduced;
16656 }
16657
16658 // If we're building a vector out of consecutive loads, just load that
16659 // vector type.
16660 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16661 if (Reduced)
16662 return Reduced;
16663
16664 // If we're building a vector out of extended elements from another vector
16665 // we have P9 vector integer extend instructions. The code assumes legal
16666 // input types (i.e. it can't handle things like v4i16) so do not run before
16667 // legalization.
16668 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16669 Reduced = combineBVOfVecSExt(N, DAG);
16670 if (Reduced)
16671 return Reduced;
16672 }
16673
16674 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16675 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16676 // is a load from <valid narrow width> to i128.
16677 if (Subtarget.isISA3_1()) {
16678 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16679 if (BVOfZLoad)
16680 return BVOfZLoad;
16681 }
16682
16683 if (N->getValueType(0) != MVT::v2f64)
16684 return SDValue();
16685
16686 // Looking for:
16687 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16688 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16689 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16690 return SDValue();
16691 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16692 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16693 return SDValue();
16694 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16695 return SDValue();
16696
16697 SDValue Ext1 = FirstInput.getOperand(0);
16698 SDValue Ext2 = N->getOperand(1).getOperand(0);
16699 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16701 return SDValue();
16702
16703 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16704 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16705 if (!Ext1Op || !Ext2Op)
16706 return SDValue();
16707 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16708 Ext1.getOperand(0) != Ext2.getOperand(0))
16709 return SDValue();
16710
16711 int FirstElem = Ext1Op->getZExtValue();
16712 int SecondElem = Ext2Op->getZExtValue();
16713 int SubvecIdx;
16714 if (FirstElem == 0 && SecondElem == 1)
16715 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16716 else if (FirstElem == 2 && SecondElem == 3)
16717 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16718 else
16719 return SDValue();
16720
16721 SDValue SrcVec = Ext1.getOperand(0);
16722 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16723 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16724 return DAG.getNode(NodeType, dl, MVT::v2f64,
16725 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16726}
16727
16728SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16729 DAGCombinerInfo &DCI) const {
16730 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16731 N->getOpcode() == ISD::UINT_TO_FP) &&
16732 "Need an int -> FP conversion node here");
16733
16734 if (useSoftFloat() || !Subtarget.has64BitSupport())
16735 return SDValue();
16736
16737 SelectionDAG &DAG = DCI.DAG;
16738 SDLoc dl(N);
16739 SDValue Op(N, 0);
16740
16741 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16742 // from the hardware.
16743 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16744 return SDValue();
16745 if (!Op.getOperand(0).getValueType().isSimple())
16746 return SDValue();
16747 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16748 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16749 return SDValue();
16750
16751 SDValue FirstOperand(Op.getOperand(0));
16752 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16753 (FirstOperand.getValueType() == MVT::i8 ||
16754 FirstOperand.getValueType() == MVT::i16);
16755 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16756 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16757 bool DstDouble = Op.getValueType() == MVT::f64;
16758 unsigned ConvOp = Signed ?
16759 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16760 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16761 SDValue WidthConst =
16762 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16763 dl, false);
16764 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16765 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16766 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16767 DAG.getVTList(MVT::f64, MVT::Other),
16768 Ops, MVT::i8, LDN->getMemOperand());
16769 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16770
16771 // For signed conversion, we need to sign-extend the value in the VSR
16772 if (Signed) {
16773 SDValue ExtOps[] = { Ld, WidthConst };
16774 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16775 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16776 } else
16777 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16778 }
16779
16780
16781 // For i32 intermediate values, unfortunately, the conversion functions
16782 // leave the upper 32 bits of the value are undefined. Within the set of
16783 // scalar instructions, we have no method for zero- or sign-extending the
16784 // value. Thus, we cannot handle i32 intermediate values here.
16785 if (Op.getOperand(0).getValueType() == MVT::i32)
16786 return SDValue();
16787
16788 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16789 "UINT_TO_FP is supported only with FPCVT");
16790
16791 // If we have FCFIDS, then use it when converting to single-precision.
16792 // Otherwise, convert to double-precision and then round.
16793 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16794 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16795 : PPCISD::FCFIDS)
16796 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16797 : PPCISD::FCFID);
16798 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16799 ? MVT::f32
16800 : MVT::f64;
16801
16802 // If we're converting from a float, to an int, and back to a float again,
16803 // then we don't need the store/load pair at all.
16804 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16805 Subtarget.hasFPCVT()) ||
16806 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16807 SDValue Src = Op.getOperand(0).getOperand(0);
16808 if (Src.getValueType() == MVT::f32) {
16809 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16810 DCI.AddToWorklist(Src.getNode());
16811 } else if (Src.getValueType() != MVT::f64) {
16812 // Make sure that we don't pick up a ppc_fp128 source value.
16813 return SDValue();
16814 }
16815
16816 unsigned FCTOp =
16817 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16818 PPCISD::FCTIDUZ;
16819
16820 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16821 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16822
16823 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16824 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16825 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16826 DCI.AddToWorklist(FP.getNode());
16827 }
16828
16829 return FP;
16830 }
16831
16832 return SDValue();
16833}
16834
16835// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16836// builtins) into loads with swaps.
16838 DAGCombinerInfo &DCI) const {
16839 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16840 // load combines.
16841 if (DCI.isBeforeLegalizeOps())
16842 return SDValue();
16843
16844 SelectionDAG &DAG = DCI.DAG;
16845 SDLoc dl(N);
16846 SDValue Chain;
16847 SDValue Base;
16848 MachineMemOperand *MMO;
16849
16850 switch (N->getOpcode()) {
16851 default:
16852 llvm_unreachable("Unexpected opcode for little endian VSX load");
16853 case ISD::LOAD: {
16855 Chain = LD->getChain();
16856 Base = LD->getBasePtr();
16857 MMO = LD->getMemOperand();
16858 // If the MMO suggests this isn't a load of a full vector, leave
16859 // things alone. For a built-in, we have to make the change for
16860 // correctness, so if there is a size problem that will be a bug.
16861 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16862 return SDValue();
16863 break;
16864 }
16867 Chain = Intrin->getChain();
16868 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16869 // us what we want. Get operand 2 instead.
16870 Base = Intrin->getOperand(2);
16871 MMO = Intrin->getMemOperand();
16872 break;
16873 }
16874 }
16875
16876 MVT VecTy = N->getValueType(0).getSimpleVT();
16877
16878 SDValue LoadOps[] = { Chain, Base };
16879 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16880 DAG.getVTList(MVT::v2f64, MVT::Other),
16881 LoadOps, MVT::v2f64, MMO);
16882
16883 DCI.AddToWorklist(Load.getNode());
16884 Chain = Load.getValue(1);
16885 SDValue Swap = DAG.getNode(
16886 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16887 DCI.AddToWorklist(Swap.getNode());
16888
16889 // Add a bitcast if the resulting load type doesn't match v2f64.
16890 if (VecTy != MVT::v2f64) {
16891 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16892 DCI.AddToWorklist(N.getNode());
16893 // Package {bitcast value, swap's chain} to match Load's shape.
16894 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16895 N, Swap.getValue(1));
16896 }
16897
16898 return Swap;
16899}
16900
16901// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16902// builtins) into stores with swaps.
16904 DAGCombinerInfo &DCI) const {
16905 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16906 // store combines.
16907 if (DCI.isBeforeLegalizeOps())
16908 return SDValue();
16909
16910 SelectionDAG &DAG = DCI.DAG;
16911 SDLoc dl(N);
16912 SDValue Chain;
16913 SDValue Base;
16914 unsigned SrcOpnd;
16915 MachineMemOperand *MMO;
16916
16917 switch (N->getOpcode()) {
16918 default:
16919 llvm_unreachable("Unexpected opcode for little endian VSX store");
16920 case ISD::STORE: {
16922 Chain = ST->getChain();
16923 Base = ST->getBasePtr();
16924 MMO = ST->getMemOperand();
16925 SrcOpnd = 1;
16926 // If the MMO suggests this isn't a store of a full vector, leave
16927 // things alone. For a built-in, we have to make the change for
16928 // correctness, so if there is a size problem that will be a bug.
16929 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16930 return SDValue();
16931 break;
16932 }
16933 case ISD::INTRINSIC_VOID: {
16935 Chain = Intrin->getChain();
16936 // Intrin->getBasePtr() oddly does not get what we want.
16937 Base = Intrin->getOperand(3);
16938 MMO = Intrin->getMemOperand();
16939 SrcOpnd = 2;
16940 break;
16941 }
16942 }
16943
16944 SDValue Src = N->getOperand(SrcOpnd);
16945 MVT VecTy = Src.getValueType().getSimpleVT();
16946
16947 // All stores are done as v2f64 and possible bit cast.
16948 if (VecTy != MVT::v2f64) {
16949 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16950 DCI.AddToWorklist(Src.getNode());
16951 }
16952
16953 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16954 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16955 DCI.AddToWorklist(Swap.getNode());
16956 Chain = Swap.getValue(1);
16957 SDValue StoreOps[] = { Chain, Swap, Base };
16958 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16959 DAG.getVTList(MVT::Other),
16960 StoreOps, VecTy, MMO);
16961 DCI.AddToWorklist(Store.getNode());
16962 return Store;
16963}
16964
16965// Handle DAG combine for STORE (FP_TO_INT F).
16966SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16967 DAGCombinerInfo &DCI) const {
16968 SelectionDAG &DAG = DCI.DAG;
16969 SDLoc dl(N);
16970 unsigned Opcode = N->getOperand(1).getOpcode();
16971 (void)Opcode;
16972 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16973
16974 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16975 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16976 && "Not a FP_TO_INT Instruction!");
16977
16978 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16979 EVT Op1VT = N->getOperand(1).getValueType();
16980 EVT ResVT = Val.getValueType();
16981
16982 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16983 return SDValue();
16984
16985 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16986 bool ValidTypeForStoreFltAsInt =
16987 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16988 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16989
16990 // TODO: Lower conversion from f128 on all VSX targets
16991 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16992 return SDValue();
16993
16994 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16995 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16996 return SDValue();
16997
16998 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16999
17000 // Set number of bytes being converted.
17001 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
17002 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
17003 DAG.getIntPtrConstant(ByteSize, dl, false),
17004 DAG.getValueType(Op1VT)};
17005
17006 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
17007 DAG.getVTList(MVT::Other), Ops,
17008 cast<StoreSDNode>(N)->getMemoryVT(),
17009 cast<StoreSDNode>(N)->getMemOperand());
17010
17011 return Val;
17012}
17013
17014static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
17015 // Check that the source of the element keeps flipping
17016 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
17017 bool PrevElemFromFirstVec = Mask[0] < NumElts;
17018 for (int i = 1, e = Mask.size(); i < e; i++) {
17019 if (PrevElemFromFirstVec && Mask[i] < NumElts)
17020 return false;
17021 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
17022 return false;
17023 PrevElemFromFirstVec = !PrevElemFromFirstVec;
17024 }
17025 return true;
17026}
17027
17028static bool isSplatBV(SDValue Op) {
17029 if (Op.getOpcode() != ISD::BUILD_VECTOR)
17030 return false;
17031 SDValue FirstOp;
17032
17033 // Find first non-undef input.
17034 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
17035 FirstOp = Op.getOperand(i);
17036 if (!FirstOp.isUndef())
17037 break;
17038 }
17039
17040 // All inputs are undef or the same as the first non-undef input.
17041 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
17042 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
17043 return false;
17044 return true;
17045}
17046
17048 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17049 return Op;
17050 if (Op.getOpcode() != ISD::BITCAST)
17051 return SDValue();
17052 Op = Op.getOperand(0);
17053 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17054 return Op;
17055 return SDValue();
17056}
17057
17058// Fix up the shuffle mask to account for the fact that the result of
17059// scalar_to_vector is not in lane zero. This just takes all values in
17060// the ranges specified by the min/max indices and adds the number of
17061// elements required to ensure each element comes from the respective
17062// position in the valid lane.
17063// On little endian, that's just the corresponding element in the other
17064// half of the vector. On big endian, it is in the same half but right
17065// justified rather than left justified in that half.
17067 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
17068 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
17069 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
17070 int LHSEltFixup =
17071 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
17072 int RHSEltFixup =
17073 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
17074 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
17075 int Idx = ShuffV[I];
17076 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
17077 ShuffV[I] += LHSEltFixup;
17078 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
17079 ShuffV[I] += RHSEltFixup;
17080 }
17081}
17082
17083// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
17084// the original is:
17085// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
17086// In such a case, just change the shuffle mask to extract the element
17087// from the permuted index.
17089 const PPCSubtarget &Subtarget) {
17090 SDLoc dl(OrigSToV);
17091 EVT VT = OrigSToV.getValueType();
17092 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17093 "Expecting a SCALAR_TO_VECTOR here");
17094 SDValue Input = OrigSToV.getOperand(0);
17095
17096 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17097 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
17098 SDValue OrigVector = Input.getOperand(0);
17099
17100 // Can't handle non-const element indices or different vector types
17101 // for the input to the extract and the output of the scalar_to_vector.
17102 if (Idx && VT == OrigVector.getValueType()) {
17103 unsigned NumElts = VT.getVectorNumElements();
17104 assert(
17105 NumElts > 1 &&
17106 "Cannot produce a permuted scalar_to_vector for one element vector");
17107 SmallVector<int, 16> NewMask(NumElts, -1);
17108 unsigned ResultInElt = NumElts / 2;
17109 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
17110 NewMask[ResultInElt] = Idx->getZExtValue();
17111 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
17112 }
17113 }
17114 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
17115 OrigSToV.getOperand(0));
17116}
17117
17119 int HalfVec, int LHSLastElementDefined,
17120 int RHSLastElementDefined) {
17121 for (int Index : ShuffV) {
17122 if (Index < 0) // Skip explicitly undefined mask indices.
17123 continue;
17124 // Handle first input vector of the vector_shuffle.
17125 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
17126 (Index > LHSLastElementDefined))
17127 return false;
17128 // Handle second input vector of the vector_shuffle.
17129 if ((RHSLastElementDefined >= 0) &&
17130 (Index > HalfVec + RHSLastElementDefined))
17131 return false;
17132 }
17133 return true;
17134}
17135
17137 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
17138 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
17139 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
17140 EVT VecShuffOperandType = VecShuffOperand.getValueType();
17141 // Set up the values for the shuffle vector fixup.
17142 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
17143 // The last element depends on if the input comes from the LHS or RHS.
17144 //
17145 // For example:
17146 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
17147 //
17148 // For the LHS: The last element that comes from the LHS is actually 0, not 3
17149 // because elements 1 and higher of a scalar_to_vector are undefined.
17150 // For the RHS: The last element that comes from the RHS is actually 5, not 7
17151 // because elements 1 and higher of a scalar_to_vector are undefined.
17152 // It is also not 4 because the original scalar_to_vector is wider and
17153 // actually contains two i32 elements.
17154 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
17155 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
17156 : FirstElt;
17157 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
17158 if (SToVPermuted.getValueType() != VecShuffOperandType)
17159 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
17160 return SToVPermuted;
17161}
17162
17163// On little endian subtargets, combine shuffles such as:
17164// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
17165// into:
17166// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
17167// because the latter can be matched to a single instruction merge.
17168// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
17169// to put the value into element zero. Adjust the shuffle mask so that the
17170// vector can remain in permuted form (to prevent a swap prior to a shuffle).
17171// On big endian targets, this is still useful for SCALAR_TO_VECTOR
17172// nodes with elements smaller than doubleword because all the ways
17173// of getting scalar data into a vector register put the value in the
17174// rightmost element of the left half of the vector.
17175SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
17176 SelectionDAG &DAG) const {
17177 SDValue LHS = SVN->getOperand(0);
17178 SDValue RHS = SVN->getOperand(1);
17179 auto Mask = SVN->getMask();
17180 int NumElts = LHS.getValueType().getVectorNumElements();
17181 SDValue Res(SVN, 0);
17182 SDLoc dl(SVN);
17183 bool IsLittleEndian = Subtarget.isLittleEndian();
17184
17185 // On big endian targets this is only useful for subtargets with direct moves.
17186 // On little endian targets it would be useful for all subtargets with VSX.
17187 // However adding special handling for LE subtargets without direct moves
17188 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17189 // which includes direct moves.
17190 if (!Subtarget.hasDirectMove())
17191 return Res;
17192
17193 // If this is not a shuffle of a shuffle and the first element comes from
17194 // the second vector, canonicalize to the commuted form. This will make it
17195 // more likely to match one of the single instruction patterns.
17196 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17197 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17198 std::swap(LHS, RHS);
17199 Res = DAG.getCommutedVectorShuffle(*SVN);
17200
17201 if (!isa<ShuffleVectorSDNode>(Res))
17202 return Res;
17203
17204 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17205 }
17206
17207 // Adjust the shuffle mask if either input vector comes from a
17208 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17209 // form (to prevent the need for a swap).
17210 SmallVector<int, 16> ShuffV(Mask);
17211 SDValue SToVLHS = isScalarToVec(LHS);
17212 SDValue SToVRHS = isScalarToVec(RHS);
17213 if (SToVLHS || SToVRHS) {
17214 EVT VT = SVN->getValueType(0);
17215 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17216 int ShuffleNumElts = ShuffV.size();
17217 int HalfVec = ShuffleNumElts / 2;
17218 // The width of the "valid lane" (i.e. the lane that contains the value that
17219 // is vectorized) needs to be expressed in terms of the number of elements
17220 // of the shuffle. It is thereby the ratio of the values before and after
17221 // any bitcast, which will be set later on if the LHS or RHS are
17222 // SCALAR_TO_VECTOR nodes.
17223 unsigned LHSNumValidElts = HalfVec;
17224 unsigned RHSNumValidElts = HalfVec;
17225
17226 // Initially assume that neither input is permuted. These will be adjusted
17227 // accordingly if either input is. Note, that -1 means that all elements
17228 // are undefined.
17229 int LHSFirstElt = 0;
17230 int RHSFirstElt = ShuffleNumElts;
17231 int LHSLastElt = -1;
17232 int RHSLastElt = -1;
17233
17234 // Get the permuted scalar to vector nodes for the source(s) that come from
17235 // ISD::SCALAR_TO_VECTOR.
17236 // On big endian systems, this only makes sense for element sizes smaller
17237 // than 64 bits since for 64-bit elements, all instructions already put
17238 // the value into element zero. Since scalar size of LHS and RHS may differ
17239 // after isScalarToVec, this should be checked using their own sizes.
17240 int LHSScalarSize = 0;
17241 int RHSScalarSize = 0;
17242 if (SToVLHS) {
17243 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17244 if (!IsLittleEndian && LHSScalarSize >= 64)
17245 return Res;
17246 }
17247 if (SToVRHS) {
17248 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17249 if (!IsLittleEndian && RHSScalarSize >= 64)
17250 return Res;
17251 }
17252 if (LHSScalarSize != 0)
17254 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17255 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17256 if (RHSScalarSize != 0)
17258 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17259 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17260
17261 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17262 return Res;
17263
17264 // Fix up the shuffle mask to reflect where the desired element actually is.
17265 // The minimum and maximum indices that correspond to element zero for both
17266 // the LHS and RHS are computed and will control which shuffle mask entries
17267 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17268 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17270 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17271 LHSNumValidElts, RHSNumValidElts, Subtarget);
17272 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17273
17274 // We may have simplified away the shuffle. We won't be able to do anything
17275 // further with it here.
17276 if (!isa<ShuffleVectorSDNode>(Res))
17277 return Res;
17278 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17279 }
17280
17281 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17282 // The common case after we commuted the shuffle is that the RHS is a splat
17283 // and we have elements coming in from the splat at indices that are not
17284 // conducive to using a merge.
17285 // Example:
17286 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17287 if (!isSplatBV(TheSplat))
17288 return Res;
17289
17290 // We are looking for a mask such that all even elements are from
17291 // one vector and all odd elements from the other.
17292 if (!isAlternatingShuffMask(Mask, NumElts))
17293 return Res;
17294
17295 // Adjust the mask so we are pulling in the same index from the splat
17296 // as the index from the interesting vector in consecutive elements.
17297 if (IsLittleEndian) {
17298 // Example (even elements from first vector):
17299 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17300 if (Mask[0] < NumElts)
17301 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17302 if (ShuffV[i] < 0)
17303 continue;
17304 // If element from non-splat is undef, pick first element from splat.
17305 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17306 }
17307 // Example (odd elements from first vector):
17308 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17309 else
17310 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17311 if (ShuffV[i] < 0)
17312 continue;
17313 // If element from non-splat is undef, pick first element from splat.
17314 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17315 }
17316 } else {
17317 // Example (even elements from first vector):
17318 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17319 if (Mask[0] < NumElts)
17320 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17321 if (ShuffV[i] < 0)
17322 continue;
17323 // If element from non-splat is undef, pick first element from splat.
17324 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17325 }
17326 // Example (odd elements from first vector):
17327 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17328 else
17329 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17330 if (ShuffV[i] < 0)
17331 continue;
17332 // If element from non-splat is undef, pick first element from splat.
17333 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17334 }
17335 }
17336
17337 // If the RHS has undefs, we need to remove them since we may have created
17338 // a shuffle that adds those instead of the splat value.
17339 SDValue SplatVal =
17340 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17341 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17342
17343 if (IsLittleEndian)
17344 RHS = TheSplat;
17345 else
17346 LHS = TheSplat;
17347 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17348}
17349
17350SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17351 LSBaseSDNode *LSBase,
17352 DAGCombinerInfo &DCI) const {
17353 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17354 "Not a reverse memop pattern!");
17355
17356 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17357 auto Mask = SVN->getMask();
17358 int i = 0;
17359 auto I = Mask.rbegin();
17360 auto E = Mask.rend();
17361
17362 for (; I != E; ++I) {
17363 if (*I != i)
17364 return false;
17365 i++;
17366 }
17367 return true;
17368 };
17369
17370 SelectionDAG &DAG = DCI.DAG;
17371 EVT VT = SVN->getValueType(0);
17372
17373 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17374 return SDValue();
17375
17376 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17377 // See comment in PPCVSXSwapRemoval.cpp.
17378 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17379 if (!Subtarget.hasP9Vector())
17380 return SDValue();
17381
17382 if(!IsElementReverse(SVN))
17383 return SDValue();
17384
17385 if (LSBase->getOpcode() == ISD::LOAD) {
17386 // If the load return value 0 has more than one user except the
17387 // shufflevector instruction, it is not profitable to replace the
17388 // shufflevector with a reverse load.
17389 for (SDUse &Use : LSBase->uses())
17390 if (Use.getResNo() == 0 &&
17391 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17392 return SDValue();
17393
17394 SDLoc dl(LSBase);
17395 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17396 return DAG.getMemIntrinsicNode(
17397 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17398 LSBase->getMemoryVT(), LSBase->getMemOperand());
17399 }
17400
17401 if (LSBase->getOpcode() == ISD::STORE) {
17402 // If there are other uses of the shuffle, the swap cannot be avoided.
17403 // Forcing the use of an X-Form (since swapped stores only have
17404 // X-Forms) without removing the swap is unprofitable.
17405 if (!SVN->hasOneUse())
17406 return SDValue();
17407
17408 SDLoc dl(LSBase);
17409 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17410 LSBase->getBasePtr()};
17411 return DAG.getMemIntrinsicNode(
17412 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17413 LSBase->getMemoryVT(), LSBase->getMemOperand());
17414 }
17415
17416 llvm_unreachable("Expected a load or store node here");
17417}
17418
17419static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17420 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17421 if (IntrinsicID == Intrinsic::ppc_stdcx)
17422 StoreWidth = 8;
17423 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17424 StoreWidth = 4;
17425 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17426 StoreWidth = 2;
17427 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17428 StoreWidth = 1;
17429 else
17430 return false;
17431 return true;
17432}
17433
17436 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17437 // (ADDC (ADDE 0, 0, C), -1) -> C
17438 SDValue LHS = N->getOperand(0);
17439 SDValue RHS = N->getOperand(1);
17440 if (LHS->getOpcode() == PPCISD::ADDE &&
17441 isNullConstant(LHS->getOperand(0)) &&
17442 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17443 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17444 }
17445 }
17446 return SDValue();
17447}
17448
17449/// Optimize the bitfloor(X) pattern for PowerPC.
17450/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
17451/// Into: srl MinSignedValue, (ctlz X)
17452///
17453/// This is safe on PowerPC because the srw instruction returns 0 when the
17454/// shift amount is == bitwidth, which matches the behavior we need for X=0.
17456 if (N->getOpcode() != ISD::SELECT_CC)
17457 return SDValue();
17458
17459 // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
17460 SDValue CmpLHS = N->getOperand(0);
17461 SDValue CmpRHS = N->getOperand(1);
17462 SDValue TrueVal = N->getOperand(2);
17463 SDValue FalseVal = N->getOperand(3);
17464 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
17465
17466 // Check if condition is (X == 0)
17467 if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
17468 return SDValue();
17469
17470 // Check if TrueVal is constant 0
17471 if (!isNullConstant(TrueVal))
17472 return SDValue();
17473
17474 // This combine is replacing a select_cc with a PPC srl, not an srl with a
17475 // PPC srl. If the original srl had multiple uses it would just remain in the
17476 // code. This is at most a performance consideration.
17477 if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
17478 return SDValue();
17479
17480 SDValue ShiftVal = FalseVal.getOperand(0);
17481 SDValue ShiftAmt = FalseVal.getOperand(1);
17482
17483 // Check if ShiftVal is MinSignedValue
17484 auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
17485 if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
17486 return SDValue();
17487
17488 SDValue CtlzArg;
17489 // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
17490 if (ShiftAmt.getOpcode() != ISD::CTLZ) {
17491 // Look through truncate if present (for i64 ctlz truncated to i32 shift
17492 // amount)
17493 if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
17494 return SDValue();
17495
17496 // Verify the truncate target type is appropriate for shift amount (i32, not
17497 // i1 or other)
17498 if (ShiftAmt.getValueType() != MVT::i32)
17499 return SDValue();
17500
17501 SDValue CtlzNode = ShiftAmt.getOperand(0);
17502
17503 if (CtlzNode.getOpcode() != ISD::CTLZ)
17504 return SDValue();
17505
17506 CtlzArg = CtlzNode.getOperand(0);
17507 } else {
17508 CtlzArg = ShiftAmt.getOperand(0);
17509 }
17510
17511 // Check if ctlz operates on the same value as the comparison
17512 if (CtlzArg != CmpLHS)
17513 return SDValue();
17514
17515 // Using PPCISD::SRL to ensure well-defined behavior.
17516 // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
17517 // which is exactly what we need for the bitfloor(0) case.
17518 SDLoc DL(N);
17519 SDValue PPCSrl =
17520 DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
17521 return PPCSrl;
17522}
17523
17524// Optimize zero-extension of setcc when the compared value is known to be 0
17525// or 1.
17526//
17527// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17528// -> zext(xor(Value, 1)) for seteq
17529// -> zext(Value) for setne
17530//
17531// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17532// by keeping the value in its original i32 type throughout.
17533//
17534// Example:
17535// Before: zext(setcc(test_data_class(...), 0, seteq))
17536// // test_data_class returns 0 or 1 in i32
17537// // setcc converts i32 -> i1
17538// // zext converts i1 -> i64
17539// After: zext(xor(test_data_class(...), 1))
17540// // Stays in i32, then extends to i64
17541//
17542// This is beneficial because:
17543// 1. Eliminates the setcc instruction
17544// 2. Avoids i32 -> i1 truncation
17545// 3. Keeps computation in native integer width
17546
17548 // Check if this is a zero_extend
17549 if (N->getOpcode() != ISD::ZERO_EXTEND)
17550 return SDValue();
17551
17552 SDValue Src = N->getOperand(0);
17553
17554 // Check if the source is a setcc
17555 if (Src.getOpcode() != ISD::SETCC)
17556 return SDValue();
17557
17558 SDValue LHS = Src.getOperand(0);
17559 SDValue RHS = Src.getOperand(1);
17560 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17561
17563 return SDValue();
17564
17565 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17566
17567 auto isZeroOrOne = [=](SDValue &V) {
17568 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17569 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17570 return true;
17571 return false;
17572 };
17573
17574 if (!isZeroOrOne(NonNullConstant))
17575 return SDValue();
17576
17577 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17578 // zext(setcc (Value), 0, setne))
17579 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17580 // Replace with: zext(xor(Value, 1)) for seteq
17581 // or: zext(Value) for setne
17582 // This keeps the value in i32 instead of converting to i1
17583 SDLoc DL(N);
17584 EVT VType = N->getValueType(0);
17585 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17586
17587 if (CC == ISD::SETNE)
17588 return NewNonNullConstant;
17589
17590 SDValue One = DAG.getConstant(1, DL, VType);
17591 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17592 }
17593
17594 return SDValue();
17595}
17596
17597// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17598// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17599// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17600// 1, cc))
17601// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17602// 0, 1, cc))
17603// 4. etc
17605 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17606
17607 EVT XorVT = N->getValueType(0);
17608 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17609 return SDValue();
17610
17611 SDValue LHS = N->getOperand(0);
17612 SDValue RHS = N->getOperand(1);
17613
17614 // Check for XOR with constant 1
17616 if (!XorConst || !XorConst->isOne()) {
17617 XorConst = dyn_cast<ConstantSDNode>(LHS);
17618 if (!XorConst || !XorConst->isOne())
17619 return SDValue();
17620 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17621 std::swap(LHS, RHS);
17622 }
17623
17624 // Check if LHS has only one use
17625 if (!LHS.hasOneUse())
17626 return SDValue();
17627
17628 // Handle extensions: ZEXT, ANYEXT
17629 SDValue SelectNode = LHS;
17630
17631 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17632 LHS.getOpcode() == ISD::ANY_EXTEND) {
17633 SelectNode = LHS.getOperand(0);
17634
17635 // Check if the extension input has only one use
17636 if (!SelectNode.hasOneUse())
17637 return SDValue();
17638 }
17639
17640 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17641 if (!SelectNode.isMachineOpcode())
17642 return SDValue();
17643
17644 unsigned MachineOpc = SelectNode.getMachineOpcode();
17645
17646 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17647 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17648 return SDValue();
17649
17650 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17651 if (SelectNode.getNumOperands() != 4)
17652 return SDValue();
17653
17654 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17655 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17656
17657 if (!ConstOp1 || !ConstOp2)
17658 return SDValue();
17659
17660 // Only optimize if operands are {0, 1} or {1, 0}
17661 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17662 (ConstOp1->isZero() && ConstOp2->isOne())))
17663 return SDValue();
17664
17665 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17666 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17667 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17668 // create SELECT_CC(cond, 1, 0, pred).
17669 SDLoc DL(N);
17670 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17671
17672 bool ConstOp1IsOne = ConstOp1->isOne();
17673 return SDValue(
17674 DAG.getMachineNode(MachineOpc, DL, XorVT,
17675 {SelectNode.getOperand(0),
17676 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17677 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17678 SelectNode.getOperand(3)}),
17679 0);
17680}
17681
17683 DAGCombinerInfo &DCI) const {
17684 SelectionDAG &DAG = DCI.DAG;
17685 SDLoc dl(N);
17686 switch (N->getOpcode()) {
17687 default: break;
17688 case ISD::ADD:
17689 return combineADD(N, DCI);
17690 case ISD::AND: {
17691 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17692 // original input as that will prevent us from selecting optimal rotates.
17693 // This only matters if the input to the extend is i32 widened to i64.
17694 SDValue Op1 = N->getOperand(0);
17695 SDValue Op2 = N->getOperand(1);
17696 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17697 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17698 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17699 Op1.getOperand(0).getValueType() != MVT::i32)
17700 break;
17701 SDValue NarrowOp = Op1.getOperand(0);
17702 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17703 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17704 break;
17705
17706 uint64_t Imm = Op2->getAsZExtVal();
17707 // Make sure that the constant is narrow enough to fit in the narrow type.
17708 if (!isUInt<32>(Imm))
17709 break;
17710 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17711 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17712 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17713 }
17714 case ISD::XOR: {
17715 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17716 if (SDValue V = combineXorSelectCC(N, DAG))
17717 return V;
17718 break;
17719 }
17720 case ISD::SHL:
17721 return combineSHL(N, DCI);
17722 case ISD::SRA:
17723 return combineSRA(N, DCI);
17724 case ISD::SRL:
17725 return combineSRL(N, DCI);
17726 case ISD::MUL:
17727 return combineMUL(N, DCI);
17728 case ISD::FMA:
17729 case PPCISD::FNMSUB:
17730 return combineFMALike(N, DCI);
17731 case PPCISD::SHL:
17732 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17733 return N->getOperand(0);
17734 break;
17735 case PPCISD::SRL:
17736 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17737 return N->getOperand(0);
17738 break;
17739 case PPCISD::SRA:
17740 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17741 if (C->isZero() || // 0 >>s V -> 0.
17742 C->isAllOnes()) // -1 >>s V -> -1.
17743 return N->getOperand(0);
17744 }
17745 break;
17746 case ISD::SIGN_EXTEND:
17747 if (SDValue SECC = combineSignExtendSetCC(N, DCI))
17748 return SECC;
17749 [[fallthrough]];
17750 case ISD::ZERO_EXTEND:
17751 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17752 return RetV;
17753 [[fallthrough]];
17754 case ISD::ANY_EXTEND:
17755 return DAGCombineExtBoolTrunc(N, DCI);
17756 case ISD::TRUNCATE:
17757 return combineTRUNCATE(N, DCI);
17758 case ISD::SETCC:
17759 if (SDValue CSCC = combineSetCC(N, DCI))
17760 return CSCC;
17761 [[fallthrough]];
17762 case ISD::SELECT_CC:
17763 if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
17764 return V;
17765 return DAGCombineTruncBoolExt(N, DCI);
17766 case ISD::SINT_TO_FP:
17767 case ISD::UINT_TO_FP:
17768 return combineFPToIntToFP(N, DCI);
17770 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17771 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17772 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17773 }
17774 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17775 case ISD::STORE: {
17776
17777 EVT Op1VT = N->getOperand(1).getValueType();
17778 unsigned Opcode = N->getOperand(1).getOpcode();
17779
17780 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17781 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17782 SDValue Val = combineStoreFPToInt(N, DCI);
17783 if (Val)
17784 return Val;
17785 }
17786
17787 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17788 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17789 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17790 if (Val)
17791 return Val;
17792 }
17793
17794 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17795 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17796 N->getOperand(1).getNode()->hasOneUse() &&
17797 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17798 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17799
17800 // STBRX can only handle simple types and it makes no sense to store less
17801 // two bytes in byte-reversed order.
17802 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17803 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17804 break;
17805
17806 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17807 // Do an any-extend to 32-bits if this is a half-word input.
17808 if (BSwapOp.getValueType() == MVT::i16)
17809 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17810
17811 // If the type of BSWAP operand is wider than stored memory width
17812 // it need to be shifted to the right side before STBRX.
17813 if (Op1VT.bitsGT(mVT)) {
17814 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17815 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17816 DAG.getConstant(Shift, dl, MVT::i32));
17817 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17818 if (Op1VT == MVT::i64)
17819 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17820 }
17821
17822 SDValue Ops[] = {
17823 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17824 };
17825 return
17826 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17827 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17828 cast<StoreSDNode>(N)->getMemOperand());
17829 }
17830
17831 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17832 // So it can increase the chance of CSE constant construction.
17833 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17834 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17835 // Need to sign-extended to 64-bits to handle negative values.
17836 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17837 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17838 MemVT.getSizeInBits());
17839 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17840
17841 auto *ST = cast<StoreSDNode>(N);
17842 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17843 ST->getBasePtr(), ST->getOffset(), MemVT,
17844 ST->getMemOperand(), ST->getAddressingMode(),
17845 /*IsTruncating=*/true);
17846 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17847 // new store which will change the constant by removing non-demanded bits.
17848 return ST->isUnindexed()
17849 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17850 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17851 }
17852
17853 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17854 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17855 if (Op1VT.isSimple()) {
17856 MVT StoreVT = Op1VT.getSimpleVT();
17857 if (Subtarget.needsSwapsForVSXMemOps() &&
17858 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17859 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17860 return expandVSXStoreForLE(N, DCI);
17861 }
17862 break;
17863 }
17864 case ISD::LOAD: {
17866 EVT VT = LD->getValueType(0);
17867
17868 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17869 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17870 if (VT.isSimple()) {
17871 MVT LoadVT = VT.getSimpleVT();
17872 if (Subtarget.needsSwapsForVSXMemOps() &&
17873 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17874 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17875 return expandVSXLoadForLE(N, DCI);
17876 }
17877
17878 // We sometimes end up with a 64-bit integer load, from which we extract
17879 // two single-precision floating-point numbers. This happens with
17880 // std::complex<float>, and other similar structures, because of the way we
17881 // canonicalize structure copies. However, if we lack direct moves,
17882 // then the final bitcasts from the extracted integer values to the
17883 // floating-point numbers turn into store/load pairs. Even with direct moves,
17884 // just loading the two floating-point numbers is likely better.
17885 auto ReplaceTwoFloatLoad = [&]() {
17886 if (VT != MVT::i64)
17887 return false;
17888
17889 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17890 LD->isVolatile())
17891 return false;
17892
17893 // We're looking for a sequence like this:
17894 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17895 // t16: i64 = srl t13, Constant:i32<32>
17896 // t17: i32 = truncate t16
17897 // t18: f32 = bitcast t17
17898 // t19: i32 = truncate t13
17899 // t20: f32 = bitcast t19
17900
17901 if (!LD->hasNUsesOfValue(2, 0))
17902 return false;
17903
17904 auto UI = LD->user_begin();
17905 while (UI.getUse().getResNo() != 0) ++UI;
17906 SDNode *Trunc = *UI++;
17907 while (UI.getUse().getResNo() != 0) ++UI;
17908 SDNode *RightShift = *UI;
17909 if (Trunc->getOpcode() != ISD::TRUNCATE)
17910 std::swap(Trunc, RightShift);
17911
17912 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17913 Trunc->getValueType(0) != MVT::i32 ||
17914 !Trunc->hasOneUse())
17915 return false;
17916 if (RightShift->getOpcode() != ISD::SRL ||
17917 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17918 RightShift->getConstantOperandVal(1) != 32 ||
17919 !RightShift->hasOneUse())
17920 return false;
17921
17922 SDNode *Trunc2 = *RightShift->user_begin();
17923 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17924 Trunc2->getValueType(0) != MVT::i32 ||
17925 !Trunc2->hasOneUse())
17926 return false;
17927
17928 SDNode *Bitcast = *Trunc->user_begin();
17929 SDNode *Bitcast2 = *Trunc2->user_begin();
17930
17931 if (Bitcast->getOpcode() != ISD::BITCAST ||
17932 Bitcast->getValueType(0) != MVT::f32)
17933 return false;
17934 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17935 Bitcast2->getValueType(0) != MVT::f32)
17936 return false;
17937
17938 if (Subtarget.isLittleEndian())
17939 std::swap(Bitcast, Bitcast2);
17940
17941 // Bitcast has the second float (in memory-layout order) and Bitcast2
17942 // has the first one.
17943
17944 SDValue BasePtr = LD->getBasePtr();
17945 if (LD->isIndexed()) {
17946 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17947 "Non-pre-inc AM on PPC?");
17948 BasePtr =
17949 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17950 LD->getOffset());
17951 }
17952
17953 auto MMOFlags =
17954 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17955 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17956 LD->getPointerInfo(), LD->getAlign(),
17957 MMOFlags, LD->getAAInfo());
17958 SDValue AddPtr =
17959 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17960 BasePtr, DAG.getIntPtrConstant(4, dl));
17961 SDValue FloatLoad2 = DAG.getLoad(
17962 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17963 LD->getPointerInfo().getWithOffset(4),
17964 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17965
17966 if (LD->isIndexed()) {
17967 // Note that DAGCombine should re-form any pre-increment load(s) from
17968 // what is produced here if that makes sense.
17969 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17970 }
17971
17972 DCI.CombineTo(Bitcast2, FloatLoad);
17973 DCI.CombineTo(Bitcast, FloatLoad2);
17974
17975 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17976 SDValue(FloatLoad2.getNode(), 1));
17977 return true;
17978 };
17979
17980 if (ReplaceTwoFloatLoad())
17981 return SDValue(N, 0);
17982
17983 EVT MemVT = LD->getMemoryVT();
17984 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17985 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17986 if (LD->isUnindexed() && VT.isVector() &&
17987 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17988 // P8 and later hardware should just use LOAD.
17989 !Subtarget.hasP8Vector() &&
17990 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17991 VT == MVT::v4f32))) &&
17992 LD->getAlign() < ABIAlignment) {
17993 // This is a type-legal unaligned Altivec load.
17994 SDValue Chain = LD->getChain();
17995 SDValue Ptr = LD->getBasePtr();
17996 bool isLittleEndian = Subtarget.isLittleEndian();
17997
17998 // This implements the loading of unaligned vectors as described in
17999 // the venerable Apple Velocity Engine overview. Specifically:
18000 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
18001 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
18002 //
18003 // The general idea is to expand a sequence of one or more unaligned
18004 // loads into an alignment-based permutation-control instruction (lvsl
18005 // or lvsr), a series of regular vector loads (which always truncate
18006 // their input address to an aligned address), and a series of
18007 // permutations. The results of these permutations are the requested
18008 // loaded values. The trick is that the last "extra" load is not taken
18009 // from the address you might suspect (sizeof(vector) bytes after the
18010 // last requested load), but rather sizeof(vector) - 1 bytes after the
18011 // last requested vector. The point of this is to avoid a page fault if
18012 // the base address happened to be aligned. This works because if the
18013 // base address is aligned, then adding less than a full vector length
18014 // will cause the last vector in the sequence to be (re)loaded.
18015 // Otherwise, the next vector will be fetched as you might suspect was
18016 // necessary.
18017
18018 // We might be able to reuse the permutation generation from
18019 // a different base address offset from this one by an aligned amount.
18020 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
18021 // optimization later.
18022 Intrinsic::ID Intr, IntrLD, IntrPerm;
18023 MVT PermCntlTy, PermTy, LDTy;
18024 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18025 : Intrinsic::ppc_altivec_lvsl;
18026 IntrLD = Intrinsic::ppc_altivec_lvx;
18027 IntrPerm = Intrinsic::ppc_altivec_vperm;
18028 PermCntlTy = MVT::v16i8;
18029 PermTy = MVT::v4i32;
18030 LDTy = MVT::v4i32;
18031
18032 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
18033
18034 // Create the new MMO for the new base load. It is like the original MMO,
18035 // but represents an area in memory almost twice the vector size centered
18036 // on the original address. If the address is unaligned, we might start
18037 // reading up to (sizeof(vector)-1) bytes below the address of the
18038 // original unaligned load.
18040 MachineMemOperand *BaseMMO =
18041 MF.getMachineMemOperand(LD->getMemOperand(),
18042 -(int64_t)MemVT.getStoreSize()+1,
18043 2*MemVT.getStoreSize()-1);
18044
18045 // Create the new base load.
18046 SDValue LDXIntID =
18047 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
18048 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
18049 SDValue BaseLoad =
18051 DAG.getVTList(PermTy, MVT::Other),
18052 BaseLoadOps, LDTy, BaseMMO);
18053
18054 // Note that the value of IncOffset (which is provided to the next
18055 // load's pointer info offset value, and thus used to calculate the
18056 // alignment), and the value of IncValue (which is actually used to
18057 // increment the pointer value) are different! This is because we
18058 // require the next load to appear to be aligned, even though it
18059 // is actually offset from the base pointer by a lesser amount.
18060 int IncOffset = VT.getSizeInBits() / 8;
18061 int IncValue = IncOffset;
18062
18063 // Walk (both up and down) the chain looking for another load at the real
18064 // (aligned) offset (the alignment of the other load does not matter in
18065 // this case). If found, then do not use the offset reduction trick, as
18066 // that will prevent the loads from being later combined (as they would
18067 // otherwise be duplicates).
18068 if (!findConsecutiveLoad(LD, DAG))
18069 --IncValue;
18070
18072 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
18073 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
18074
18075 MachineMemOperand *ExtraMMO =
18076 MF.getMachineMemOperand(LD->getMemOperand(),
18077 1, 2*MemVT.getStoreSize()-1);
18078 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
18079 SDValue ExtraLoad =
18081 DAG.getVTList(PermTy, MVT::Other),
18082 ExtraLoadOps, LDTy, ExtraMMO);
18083
18084 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18085 BaseLoad.getValue(1), ExtraLoad.getValue(1));
18086
18087 // Because vperm has a big-endian bias, we must reverse the order
18088 // of the input vectors and complement the permute control vector
18089 // when generating little endian code. We have already handled the
18090 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
18091 // and ExtraLoad here.
18092 SDValue Perm;
18093 if (isLittleEndian)
18094 Perm = BuildIntrinsicOp(IntrPerm,
18095 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
18096 else
18097 Perm = BuildIntrinsicOp(IntrPerm,
18098 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
18099
18100 if (VT != PermTy)
18101 Perm = Subtarget.hasAltivec()
18102 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
18103 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
18104 DAG.getTargetConstant(1, dl, MVT::i64));
18105 // second argument is 1 because this rounding
18106 // is always exact.
18107
18108 // The output of the permutation is our loaded result, the TokenFactor is
18109 // our new chain.
18110 DCI.CombineTo(N, Perm, TF);
18111 return SDValue(N, 0);
18112 }
18113 }
18114 break;
18116 bool isLittleEndian = Subtarget.isLittleEndian();
18117 unsigned IID = N->getConstantOperandVal(0);
18118 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18119 : Intrinsic::ppc_altivec_lvsl);
18120 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
18121 SDValue Add = N->getOperand(1);
18122
18123 int Bits = 4 /* 16 byte alignment */;
18124
18125 if (DAG.MaskedValueIsZero(Add->getOperand(1),
18126 APInt::getAllOnes(Bits /* alignment */)
18127 .zext(Add.getScalarValueSizeInBits()))) {
18128 SDNode *BasePtr = Add->getOperand(0).getNode();
18129 for (SDNode *U : BasePtr->users()) {
18130 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18131 U->getConstantOperandVal(0) == IID) {
18132 // We've found another LVSL/LVSR, and this address is an aligned
18133 // multiple of that one. The results will be the same, so use the
18134 // one we've just found instead.
18135
18136 return SDValue(U, 0);
18137 }
18138 }
18139 }
18140
18141 if (isa<ConstantSDNode>(Add->getOperand(1))) {
18142 SDNode *BasePtr = Add->getOperand(0).getNode();
18143 for (SDNode *U : BasePtr->users()) {
18144 if (U->getOpcode() == ISD::ADD &&
18145 isa<ConstantSDNode>(U->getOperand(1)) &&
18146 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
18147 (1ULL << Bits) ==
18148 0) {
18149 SDNode *OtherAdd = U;
18150 for (SDNode *V : OtherAdd->users()) {
18151 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18152 V->getConstantOperandVal(0) == IID) {
18153 return SDValue(V, 0);
18154 }
18155 }
18156 }
18157 }
18158 }
18159 }
18160
18161 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
18162 // Expose the vabsduw/h/b opportunity for down stream
18163 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
18164 (IID == Intrinsic::ppc_altivec_vmaxsw ||
18165 IID == Intrinsic::ppc_altivec_vmaxsh ||
18166 IID == Intrinsic::ppc_altivec_vmaxsb)) {
18167 SDValue V1 = N->getOperand(1);
18168 SDValue V2 = N->getOperand(2);
18169 if ((V1.getSimpleValueType() == MVT::v4i32 ||
18170 V1.getSimpleValueType() == MVT::v8i16 ||
18171 V1.getSimpleValueType() == MVT::v16i8) &&
18173 // (0-a, a)
18174 if (V1.getOpcode() == ISD::SUB &&
18176 V1.getOperand(1) == V2) {
18177 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
18178 }
18179 // (a, 0-a)
18180 if (V2.getOpcode() == ISD::SUB &&
18182 V2.getOperand(1) == V1) {
18183 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18184 }
18185 // (x-y, y-x)
18186 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
18187 V1.getOperand(0) == V2.getOperand(1) &&
18188 V1.getOperand(1) == V2.getOperand(0)) {
18189 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18190 }
18191 }
18192 }
18193 }
18194
18195 break;
18197 switch (N->getConstantOperandVal(1)) {
18198 default:
18199 break;
18200 case Intrinsic::ppc_altivec_vsum4sbs:
18201 case Intrinsic::ppc_altivec_vsum4shs:
18202 case Intrinsic::ppc_altivec_vsum4ubs: {
18203 // These sum-across intrinsics only have a chain due to the side effect
18204 // that they may set the SAT bit. If we know the SAT bit will not be set
18205 // for some inputs, we can replace any uses of their chain with the
18206 // input chain.
18207 if (BuildVectorSDNode *BVN =
18208 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
18209 APInt APSplatBits, APSplatUndef;
18210 unsigned SplatBitSize;
18211 bool HasAnyUndefs;
18212 bool BVNIsConstantSplat = BVN->isConstantSplat(
18213 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
18214 !Subtarget.isLittleEndian());
18215 // If the constant splat vector is 0, the SAT bit will not be set.
18216 if (BVNIsConstantSplat && APSplatBits == 0)
18217 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
18218 }
18219 return SDValue();
18220 }
18221 case Intrinsic::ppc_vsx_lxvw4x:
18222 case Intrinsic::ppc_vsx_lxvd2x:
18223 // For little endian, VSX loads require generating lxvd2x/xxswapd.
18224 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
18225 if (Subtarget.needsSwapsForVSXMemOps())
18226 return expandVSXLoadForLE(N, DCI);
18227 break;
18228 }
18229 break;
18231 // For little endian, VSX stores require generating xxswapd/stxvd2x.
18232 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
18233 if (Subtarget.needsSwapsForVSXMemOps()) {
18234 switch (N->getConstantOperandVal(1)) {
18235 default:
18236 break;
18237 case Intrinsic::ppc_vsx_stxvw4x:
18238 case Intrinsic::ppc_vsx_stxvd2x:
18239 return expandVSXStoreForLE(N, DCI);
18240 }
18241 }
18242 break;
18243 case ISD::BSWAP: {
18244 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
18245 // For subtargets without LDBRX, we can still do better than the default
18246 // expansion even for 64-bit BSWAP (LOAD).
18247 bool Is64BitBswapOn64BitTgt =
18248 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
18249 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
18250 N->getOperand(0).hasOneUse();
18251 if (IsSingleUseNormalLd &&
18252 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
18253 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
18254 SDValue Load = N->getOperand(0);
18255 LoadSDNode *LD = cast<LoadSDNode>(Load);
18256 // Create the byte-swapping load.
18257 SDValue Ops[] = {
18258 LD->getChain(), // Chain
18259 LD->getBasePtr(), // Ptr
18260 DAG.getValueType(N->getValueType(0)) // VT
18261 };
18262 SDValue BSLoad =
18263 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18264 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18265 MVT::i64 : MVT::i32, MVT::Other),
18266 Ops, LD->getMemoryVT(), LD->getMemOperand());
18267
18268 // If this is an i16 load, insert the truncate.
18269 SDValue ResVal = BSLoad;
18270 if (N->getValueType(0) == MVT::i16)
18271 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18272
18273 // First, combine the bswap away. This makes the value produced by the
18274 // load dead.
18275 DCI.CombineTo(N, ResVal);
18276
18277 // Next, combine the load away, we give it a bogus result value but a real
18278 // chain result. The result value is dead because the bswap is dead.
18279 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18280
18281 // Return N so it doesn't get rechecked!
18282 return SDValue(N, 0);
18283 }
18284 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18285 // before legalization so that the BUILD_PAIR is handled correctly.
18286 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18287 !IsSingleUseNormalLd)
18288 return SDValue();
18289 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18290
18291 // Can't split volatile or atomic loads.
18292 if (!LD->isSimple())
18293 return SDValue();
18294 SDValue BasePtr = LD->getBasePtr();
18295 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18296 LD->getPointerInfo(), LD->getAlign());
18297 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18298 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18299 DAG.getIntPtrConstant(4, dl));
18301 LD->getMemOperand(), 4, 4);
18302 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18303 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18304 SDValue Res;
18305 if (Subtarget.isLittleEndian())
18306 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18307 else
18308 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18309 SDValue TF =
18310 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18311 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18312 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18313 return Res;
18314 }
18315 case PPCISD::VCMP:
18316 // If a VCMP_rec node already exists with exactly the same operands as this
18317 // node, use its result instead of this node (VCMP_rec computes both a CR6
18318 // and a normal output).
18319 //
18320 if (!N->getOperand(0).hasOneUse() &&
18321 !N->getOperand(1).hasOneUse() &&
18322 !N->getOperand(2).hasOneUse()) {
18323
18324 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18325 SDNode *VCMPrecNode = nullptr;
18326
18327 SDNode *LHSN = N->getOperand(0).getNode();
18328 for (SDNode *User : LHSN->users())
18329 if (User->getOpcode() == PPCISD::VCMP_rec &&
18330 User->getOperand(1) == N->getOperand(1) &&
18331 User->getOperand(2) == N->getOperand(2) &&
18332 User->getOperand(0) == N->getOperand(0)) {
18333 VCMPrecNode = User;
18334 break;
18335 }
18336
18337 // If there is no VCMP_rec node, or if the flag value has a single use,
18338 // don't transform this.
18339 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18340 break;
18341
18342 // Look at the (necessarily single) use of the flag value. If it has a
18343 // chain, this transformation is more complex. Note that multiple things
18344 // could use the value result, which we should ignore.
18345 SDNode *FlagUser = nullptr;
18346 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18347 FlagUser == nullptr; ++UI) {
18348 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18349 SDNode *User = UI->getUser();
18350 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18351 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18352 FlagUser = User;
18353 break;
18354 }
18355 }
18356 }
18357
18358 // If the user is a MFOCRF instruction, we know this is safe.
18359 // Otherwise we give up for right now.
18360 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18361 return SDValue(VCMPrecNode, 0);
18362 }
18363 break;
18364 case ISD::BR_CC: {
18365 // If this is a branch on an altivec predicate comparison, lower this so
18366 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18367 // lowering is done pre-legalize, because the legalizer lowers the predicate
18368 // compare down to code that is difficult to reassemble.
18369 // This code also handles branches that depend on the result of a store
18370 // conditional.
18371 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18372 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18373
18374 int CompareOpc;
18375 bool isDot;
18376
18377 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18378 break;
18379
18380 // Since we are doing this pre-legalize, the RHS can be a constant of
18381 // arbitrary bitwidth which may cause issues when trying to get the value
18382 // from the underlying APInt.
18383 auto RHSAPInt = RHS->getAsAPIntVal();
18384 if (!RHSAPInt.isIntN(64))
18385 break;
18386
18387 unsigned Val = RHSAPInt.getZExtValue();
18388 auto isImpossibleCompare = [&]() {
18389 // If this is a comparison against something other than 0/1, then we know
18390 // that the condition is never/always true.
18391 if (Val != 0 && Val != 1) {
18392 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18393 return N->getOperand(0);
18394 // Always !=, turn it into an unconditional branch.
18395 return DAG.getNode(ISD::BR, dl, MVT::Other,
18396 N->getOperand(0), N->getOperand(4));
18397 }
18398 return SDValue();
18399 };
18400 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18401 unsigned StoreWidth = 0;
18402 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18403 isStoreConditional(LHS, StoreWidth)) {
18404 if (SDValue Impossible = isImpossibleCompare())
18405 return Impossible;
18406 PPC::Predicate CompOpc;
18407 // eq 0 => ne
18408 // ne 0 => eq
18409 // eq 1 => eq
18410 // ne 1 => ne
18411 if (Val == 0)
18412 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18413 else
18414 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18415
18416 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18417 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18418 auto *MemNode = cast<MemSDNode>(LHS);
18419 SDValue ConstSt = DAG.getMemIntrinsicNode(
18420 PPCISD::STORE_COND, dl,
18421 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18422 MemNode->getMemoryVT(), MemNode->getMemOperand());
18423
18424 SDValue InChain;
18425 // Unchain the branch from the original store conditional.
18426 if (N->getOperand(0) == LHS.getValue(1))
18427 InChain = LHS.getOperand(0);
18428 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18429 SmallVector<SDValue, 4> InChains;
18430 SDValue InTF = N->getOperand(0);
18431 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18432 if (InTF.getOperand(i) != LHS.getValue(1))
18433 InChains.push_back(InTF.getOperand(i));
18434 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18435 }
18436
18437 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18438 DAG.getConstant(CompOpc, dl, MVT::i32),
18439 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18440 ConstSt.getValue(2));
18441 }
18442
18443 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18444 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18445 assert(isDot && "Can't compare against a vector result!");
18446
18447 if (SDValue Impossible = isImpossibleCompare())
18448 return Impossible;
18449
18450 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18451 // Create the PPCISD altivec 'dot' comparison node.
18452 SDValue Ops[] = {
18453 LHS.getOperand(2), // LHS of compare
18454 LHS.getOperand(3), // RHS of compare
18455 DAG.getConstant(CompareOpc, dl, MVT::i32)
18456 };
18457 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18458 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18459
18460 // Unpack the result based on how the target uses it.
18461 PPC::Predicate CompOpc;
18462 switch (LHS.getConstantOperandVal(1)) {
18463 default: // Can't happen, don't crash on invalid number though.
18464 case 0: // Branch on the value of the EQ bit of CR6.
18465 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18466 break;
18467 case 1: // Branch on the inverted value of the EQ bit of CR6.
18468 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18469 break;
18470 case 2: // Branch on the value of the LT bit of CR6.
18471 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18472 break;
18473 case 3: // Branch on the inverted value of the LT bit of CR6.
18474 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18475 break;
18476 }
18477
18478 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18479 DAG.getConstant(CompOpc, dl, MVT::i32),
18480 DAG.getRegister(PPC::CR6, MVT::i32),
18481 N->getOperand(4), CompNode.getValue(1));
18482 }
18483 break;
18484 }
18485 case ISD::BUILD_VECTOR:
18486 return DAGCombineBuildVector(N, DCI);
18487 case PPCISD::ADDC:
18488 return DAGCombineAddc(N, DCI);
18489
18490 case ISD::BITCAST:
18491 return DAGCombineBitcast(N, DCI);
18492 }
18493
18494 return SDValue();
18495}
18496
18497SDValue
18499 SelectionDAG &DAG,
18500 SmallVectorImpl<SDNode *> &Created) const {
18501 // fold (sdiv X, pow2)
18502 EVT VT = N->getValueType(0);
18503 if (VT == MVT::i64 && !Subtarget.isPPC64())
18504 return SDValue();
18505 if ((VT != MVT::i32 && VT != MVT::i64) ||
18506 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18507 return SDValue();
18508
18509 SDLoc DL(N);
18510 SDValue N0 = N->getOperand(0);
18511
18512 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18513 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18514 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18515
18516 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18517 Created.push_back(Op.getNode());
18518
18519 if (IsNegPow2) {
18520 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18521 Created.push_back(Op.getNode());
18522 }
18523
18524 return Op;
18525}
18526
18527//===----------------------------------------------------------------------===//
18528// Inline Assembly Support
18529//===----------------------------------------------------------------------===//
18530
18532 KnownBits &Known,
18533 const APInt &DemandedElts,
18534 const SelectionDAG &DAG,
18535 unsigned Depth) const {
18536 Known.resetAll();
18537 switch (Op.getOpcode()) {
18538 default: break;
18539 case PPCISD::LBRX: {
18540 // lhbrx is known to have the top bits cleared out.
18541 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18542 Known.Zero = 0xFFFF0000;
18543 break;
18544 }
18545 case PPCISD::ADDE: {
18546 if (Op.getResNo() == 0) {
18547 // (0|1), _ = ADDE 0, 0, CARRY
18548 SDValue LHS = Op.getOperand(0);
18549 SDValue RHS = Op.getOperand(1);
18550 if (isNullConstant(LHS) && isNullConstant(RHS))
18551 Known.Zero = ~1ULL;
18552 }
18553 break;
18554 }
18556 switch (Op.getConstantOperandVal(0)) {
18557 default: break;
18558 case Intrinsic::ppc_altivec_vcmpbfp_p:
18559 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18560 case Intrinsic::ppc_altivec_vcmpequb_p:
18561 case Intrinsic::ppc_altivec_vcmpequh_p:
18562 case Intrinsic::ppc_altivec_vcmpequw_p:
18563 case Intrinsic::ppc_altivec_vcmpequd_p:
18564 case Intrinsic::ppc_altivec_vcmpequq_p:
18565 case Intrinsic::ppc_altivec_vcmpgefp_p:
18566 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18567 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18568 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18569 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18570 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18571 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18572 case Intrinsic::ppc_altivec_vcmpgtub_p:
18573 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18574 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18575 case Intrinsic::ppc_altivec_vcmpgtud_p:
18576 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18577 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18578 break;
18579 }
18580 break;
18581 }
18583 switch (Op.getConstantOperandVal(1)) {
18584 default:
18585 break;
18586 case Intrinsic::ppc_load2r:
18587 // Top bits are cleared for load2r (which is the same as lhbrx).
18588 Known.Zero = 0xFFFF0000;
18589 break;
18590 }
18591 break;
18592 }
18593 }
18594}
18595
18597 switch (Subtarget.getCPUDirective()) {
18598 default: break;
18599 case PPC::DIR_970:
18600 case PPC::DIR_PWR4:
18601 case PPC::DIR_PWR5:
18602 case PPC::DIR_PWR5X:
18603 case PPC::DIR_PWR6:
18604 case PPC::DIR_PWR6X:
18605 case PPC::DIR_PWR7:
18606 case PPC::DIR_PWR8:
18607 case PPC::DIR_PWR9:
18608 case PPC::DIR_PWR10:
18609 case PPC::DIR_PWR11:
18610 case PPC::DIR_PWR_FUTURE: {
18611 if (!ML)
18612 break;
18613
18615 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18616 // so that we can decrease cache misses and branch-prediction misses.
18617 // Actual alignment of the loop will depend on the hotness check and other
18618 // logic in alignBlocks.
18619 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18620 return Align(32);
18621 }
18622
18623 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18624
18625 // For small loops (between 5 and 8 instructions), align to a 32-byte
18626 // boundary so that the entire loop fits in one instruction-cache line.
18627 uint64_t LoopSize = 0;
18628 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18629 for (const MachineInstr &J : **I) {
18630 LoopSize += TII->getInstSizeInBytes(J);
18631 if (LoopSize > 32)
18632 break;
18633 }
18634
18635 if (LoopSize > 16 && LoopSize <= 32)
18636 return Align(32);
18637
18638 break;
18639 }
18640 }
18641
18643}
18644
18645/// getConstraintType - Given a constraint, return the type of
18646/// constraint it is for this target.
18649 if (Constraint.size() == 1) {
18650 switch (Constraint[0]) {
18651 default: break;
18652 case 'b':
18653 case 'r':
18654 case 'f':
18655 case 'd':
18656 case 'v':
18657 case 'y':
18658 return C_RegisterClass;
18659 case 'Z':
18660 // FIXME: While Z does indicate a memory constraint, it specifically
18661 // indicates an r+r address (used in conjunction with the 'y' modifier
18662 // in the replacement string). Currently, we're forcing the base
18663 // register to be r0 in the asm printer (which is interpreted as zero)
18664 // and forming the complete address in the second register. This is
18665 // suboptimal.
18666 return C_Memory;
18667 }
18668 } else if (Constraint == "wc") { // individual CR bits.
18669 return C_RegisterClass;
18670 } else if (Constraint == "wa" || Constraint == "wd" ||
18671 Constraint == "wf" || Constraint == "ws" ||
18672 Constraint == "wi" || Constraint == "ww") {
18673 return C_RegisterClass; // VSX registers.
18674 }
18675 return TargetLowering::getConstraintType(Constraint);
18676}
18677
18678/// Examine constraint type and operand type and determine a weight value.
18679/// This object must already have been set up with the operand type
18680/// and the current alternative constraint selected.
18683 AsmOperandInfo &info, const char *constraint) const {
18685 Value *CallOperandVal = info.CallOperandVal;
18686 // If we don't have a value, we can't do a match,
18687 // but allow it at the lowest weight.
18688 if (!CallOperandVal)
18689 return CW_Default;
18690 Type *type = CallOperandVal->getType();
18691
18692 // Look at the constraint type.
18693 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18694 return CW_Register; // an individual CR bit.
18695 else if ((StringRef(constraint) == "wa" ||
18696 StringRef(constraint) == "wd" ||
18697 StringRef(constraint) == "wf") &&
18698 type->isVectorTy())
18699 return CW_Register;
18700 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18701 return CW_Register; // just hold 64-bit integers data.
18702 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18703 return CW_Register;
18704 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18705 return CW_Register;
18706
18707 switch (*constraint) {
18708 default:
18710 break;
18711 case 'b':
18712 if (type->isIntegerTy())
18713 weight = CW_Register;
18714 break;
18715 case 'f':
18716 if (type->isFloatTy())
18717 weight = CW_Register;
18718 break;
18719 case 'd':
18720 if (type->isDoubleTy())
18721 weight = CW_Register;
18722 break;
18723 case 'v':
18724 if (type->isVectorTy())
18725 weight = CW_Register;
18726 break;
18727 case 'y':
18728 weight = CW_Register;
18729 break;
18730 case 'Z':
18731 weight = CW_Memory;
18732 break;
18733 }
18734 return weight;
18735}
18736
18737std::pair<unsigned, const TargetRegisterClass *>
18739 StringRef Constraint,
18740 MVT VT) const {
18741 if (Constraint.size() == 1) {
18742 // GCC RS6000 Constraint Letters
18743 switch (Constraint[0]) {
18744 case 'b': // R1-R31
18745 if (VT == MVT::i64 && Subtarget.isPPC64())
18746 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18747 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18748 case 'r': // R0-R31
18749 if (VT == MVT::i64 && Subtarget.isPPC64())
18750 return std::make_pair(0U, &PPC::G8RCRegClass);
18751 return std::make_pair(0U, &PPC::GPRCRegClass);
18752 // 'd' and 'f' constraints are both defined to be "the floating point
18753 // registers", where one is for 32-bit and the other for 64-bit. We don't
18754 // really care overly much here so just give them all the same reg classes.
18755 case 'd':
18756 case 'f':
18757 if (Subtarget.hasSPE()) {
18758 if (VT == MVT::f32 || VT == MVT::i32)
18759 return std::make_pair(0U, &PPC::GPRCRegClass);
18760 if (VT == MVT::f64 || VT == MVT::i64)
18761 return std::make_pair(0U, &PPC::SPERCRegClass);
18762 } else {
18763 if (VT == MVT::f32 || VT == MVT::i32)
18764 return std::make_pair(0U, &PPC::F4RCRegClass);
18765 if (VT == MVT::f64 || VT == MVT::i64)
18766 return std::make_pair(0U, &PPC::F8RCRegClass);
18767 }
18768 break;
18769 case 'v':
18770 if (Subtarget.hasAltivec() && VT.isVector())
18771 return std::make_pair(0U, &PPC::VRRCRegClass);
18772 else if (Subtarget.hasVSX())
18773 // Scalars in Altivec registers only make sense with VSX.
18774 return std::make_pair(0U, &PPC::VFRCRegClass);
18775 break;
18776 case 'y': // crrc
18777 return std::make_pair(0U, &PPC::CRRCRegClass);
18778 }
18779 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18780 // An individual CR bit.
18781 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18782 } else if ((Constraint == "wa" || Constraint == "wd" ||
18783 Constraint == "wf" || Constraint == "wi") &&
18784 Subtarget.hasVSX()) {
18785 // A VSX register for either a scalar (FP) or vector. There is no
18786 // support for single precision scalars on subtargets prior to Power8.
18787 if (VT.isVector())
18788 return std::make_pair(0U, &PPC::VSRCRegClass);
18789 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18790 return std::make_pair(0U, &PPC::VSSRCRegClass);
18791 return std::make_pair(0U, &PPC::VSFRCRegClass);
18792 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18793 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18794 return std::make_pair(0U, &PPC::VSSRCRegClass);
18795 else
18796 return std::make_pair(0U, &PPC::VSFRCRegClass);
18797 } else if (Constraint == "lr") {
18798 if (VT == MVT::i64)
18799 return std::make_pair(0U, &PPC::LR8RCRegClass);
18800 else
18801 return std::make_pair(0U, &PPC::LRRCRegClass);
18802 }
18803
18804 // Handle special cases of physical registers that are not properly handled
18805 // by the base class.
18806 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18807 // If we name a VSX register, we can't defer to the base class because it
18808 // will not recognize the correct register (their names will be VSL{0-31}
18809 // and V{0-31} so they won't match). So we match them here.
18810 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18811 int VSNum = atoi(Constraint.data() + 3);
18812 assert(VSNum >= 0 && VSNum <= 63 &&
18813 "Attempted to access a vsr out of range");
18814 if (VSNum < 32)
18815 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18816 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18817 }
18818
18819 // For float registers, we can't defer to the base class as it will match
18820 // the SPILLTOVSRRC class.
18821 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18822 int RegNum = atoi(Constraint.data() + 2);
18823 if (RegNum > 31 || RegNum < 0)
18824 report_fatal_error("Invalid floating point register number");
18825 if (VT == MVT::f32 || VT == MVT::i32)
18826 return Subtarget.hasSPE()
18827 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18828 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18829 if (VT == MVT::f64 || VT == MVT::i64)
18830 return Subtarget.hasSPE()
18831 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18832 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18833 }
18834 }
18835
18836 std::pair<unsigned, const TargetRegisterClass *> R =
18838
18839 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18840 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18841 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18842 // register.
18843 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18844 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18845 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18846 PPC::GPRCRegClass.contains(R.first))
18847 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18848 PPC::sub_32, &PPC::G8RCRegClass),
18849 &PPC::G8RCRegClass);
18850
18851 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18852 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18853 R.first = PPC::CR0;
18854 R.second = &PPC::CRRCRegClass;
18855 }
18856 // FIXME: This warning should ideally be emitted in the front end.
18857 const auto &TM = getTargetMachine();
18858 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18859 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18860 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18861 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18862 errs() << "warning: vector registers 20 to 32 are reserved in the "
18863 "default AIX AltiVec ABI and cannot be used\n";
18864 }
18865
18866 return R;
18867}
18868
18869/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18870/// vector. If it is invalid, don't add anything to Ops.
18872 StringRef Constraint,
18873 std::vector<SDValue> &Ops,
18874 SelectionDAG &DAG) const {
18875 SDValue Result;
18876
18877 // Only support length 1 constraints.
18878 if (Constraint.size() > 1)
18879 return;
18880
18881 char Letter = Constraint[0];
18882 switch (Letter) {
18883 default: break;
18884 case 'I':
18885 case 'J':
18886 case 'K':
18887 case 'L':
18888 case 'M':
18889 case 'N':
18890 case 'O':
18891 case 'P': {
18893 if (!CST) return; // Must be an immediate to match.
18894 SDLoc dl(Op);
18895 int64_t Value = CST->getSExtValue();
18896 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18897 // numbers are printed as such.
18898 switch (Letter) {
18899 default: llvm_unreachable("Unknown constraint letter!");
18900 case 'I': // "I" is a signed 16-bit constant.
18901 if (isInt<16>(Value))
18902 Result = DAG.getTargetConstant(Value, dl, TCVT);
18903 break;
18904 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18906 Result = DAG.getTargetConstant(Value, dl, TCVT);
18907 break;
18908 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18910 Result = DAG.getTargetConstant(Value, dl, TCVT);
18911 break;
18912 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18913 if (isUInt<16>(Value))
18914 Result = DAG.getTargetConstant(Value, dl, TCVT);
18915 break;
18916 case 'M': // "M" is a constant that is greater than 31.
18917 if (Value > 31)
18918 Result = DAG.getTargetConstant(Value, dl, TCVT);
18919 break;
18920 case 'N': // "N" is a positive constant that is an exact power of two.
18921 if (Value > 0 && isPowerOf2_64(Value))
18922 Result = DAG.getTargetConstant(Value, dl, TCVT);
18923 break;
18924 case 'O': // "O" is the constant zero.
18925 if (Value == 0)
18926 Result = DAG.getTargetConstant(Value, dl, TCVT);
18927 break;
18928 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18929 if (isInt<16>(-Value))
18930 Result = DAG.getTargetConstant(Value, dl, TCVT);
18931 break;
18932 }
18933 break;
18934 }
18935 }
18936
18937 if (Result.getNode()) {
18938 Ops.push_back(Result);
18939 return;
18940 }
18941
18942 // Handle standard constraint letters.
18944}
18945
18948 SelectionDAG &DAG) const {
18949 if (I.getNumOperands() <= 1)
18950 return;
18951 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18952 return;
18953 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18954 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18955 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18956 return;
18957
18958 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18959 Ops.push_back(DAG.getMDNode(MDN));
18960}
18961
18962// isLegalAddressingMode - Return true if the addressing mode represented
18963// by AM is legal for this target, for a load/store of the specified type.
18965 const AddrMode &AM, Type *Ty,
18966 unsigned AS,
18967 Instruction *I) const {
18968 // Vector type r+i form is supported since power9 as DQ form. We don't check
18969 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18970 // imm form is preferred and the offset can be adjusted to use imm form later
18971 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18972 // max offset to check legal addressing mode, we should be a little aggressive
18973 // to contain other offsets for that LSRUse.
18974 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18975 return false;
18976
18977 // PPC allows a sign-extended 16-bit immediate field.
18978 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18979 return false;
18980
18981 // No global is ever allowed as a base.
18982 if (AM.BaseGV)
18983 return false;
18984
18985 // PPC only support r+r,
18986 switch (AM.Scale) {
18987 case 0: // "r+i" or just "i", depending on HasBaseReg.
18988 break;
18989 case 1:
18990 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18991 return false;
18992 // Otherwise we have r+r or r+i.
18993 break;
18994 case 2:
18995 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18996 return false;
18997 // Allow 2*r as r+r.
18998 break;
18999 default:
19000 // No other scales are supported.
19001 return false;
19002 }
19003
19004 return true;
19005}
19006
19007SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
19008 SelectionDAG &DAG) const {
19010 MachineFrameInfo &MFI = MF.getFrameInfo();
19011 MFI.setReturnAddressIsTaken(true);
19012
19013 SDLoc dl(Op);
19014 unsigned Depth = Op.getConstantOperandVal(0);
19015
19016 // Make sure the function does not optimize away the store of the RA to
19017 // the stack.
19018 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
19019 FuncInfo->setLRStoreRequired();
19020 auto PtrVT = getPointerTy(MF.getDataLayout());
19021
19022 if (Depth > 0) {
19023 // The link register (return address) is saved in the caller's frame
19024 // not the callee's stack frame. So we must get the caller's frame
19025 // address and load the return address at the LR offset from there.
19026 SDValue FrameAddr =
19027 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19029 SDValue Offset =
19030 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
19031 Subtarget.getScalarIntVT());
19032 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
19033 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
19035 }
19036
19037 // Just load the return address off the stack.
19038 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
19039 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
19041}
19042
19043SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
19044 SelectionDAG &DAG) const {
19045 SDLoc dl(Op);
19046 unsigned Depth = Op.getConstantOperandVal(0);
19047
19048 MachineFunction &MF = DAG.getMachineFunction();
19049 MachineFrameInfo &MFI = MF.getFrameInfo();
19050 MFI.setFrameAddressIsTaken(true);
19051
19052 EVT PtrVT = getPointerTy(MF.getDataLayout());
19053 bool isPPC64 = PtrVT == MVT::i64;
19054
19055 // Naked functions never have a frame pointer, and so we use r1. For all
19056 // other functions, this decision must be delayed until during PEI.
19057 unsigned FrameReg;
19058 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
19059 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
19060 else
19061 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
19062
19063 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
19064 PtrVT);
19065 while (Depth--)
19066 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19067 FrameAddr, MachinePointerInfo());
19068 return FrameAddr;
19069}
19070
19071#define GET_REGISTER_MATCHER
19072#include "PPCGenAsmMatcher.inc"
19073
19075 const MachineFunction &MF) const {
19076 bool IsPPC64 = Subtarget.isPPC64();
19077
19078 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
19079 if (!Is64Bit && VT != LLT::scalar(32))
19080 report_fatal_error("Invalid register global variable type");
19081
19083 if (!Reg)
19084 return Reg;
19085
19086 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
19087 // Need followup investigation as to why.
19088 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
19089 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
19090 StringRef(RegName) + "\"."));
19091
19092 // Convert GPR to GP8R register for 64bit.
19093 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
19094 Reg = Reg.id() - PPC::R0 + PPC::X0;
19095
19096 return Reg;
19097}
19098
19100 // 32-bit SVR4 ABI access everything as got-indirect.
19101 if (Subtarget.is32BitELFABI())
19102 return true;
19103
19104 // AIX accesses everything indirectly through the TOC, which is similar to
19105 // the GOT.
19106 if (Subtarget.isAIXABI())
19107 return true;
19108
19110 // If it is small or large code model, module locals are accessed
19111 // indirectly by loading their address from .toc/.got.
19112 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
19113 return true;
19114
19115 // JumpTable and BlockAddress are accessed as got-indirect.
19117 return true;
19118
19120 return Subtarget.isGVIndirectSymbol(G->getGlobal());
19121
19122 return false;
19123}
19124
19125bool
19127 // The PowerPC target isn't yet aware of offsets.
19128 return false;
19129}
19130
19133 MachineFunction &MF, unsigned Intrinsic) const {
19134 IntrinsicInfo Info;
19135 switch (Intrinsic) {
19136 case Intrinsic::ppc_atomicrmw_xchg_i128:
19137 case Intrinsic::ppc_atomicrmw_add_i128:
19138 case Intrinsic::ppc_atomicrmw_sub_i128:
19139 case Intrinsic::ppc_atomicrmw_nand_i128:
19140 case Intrinsic::ppc_atomicrmw_and_i128:
19141 case Intrinsic::ppc_atomicrmw_or_i128:
19142 case Intrinsic::ppc_atomicrmw_xor_i128:
19143 case Intrinsic::ppc_cmpxchg_i128:
19144 Info.opc = ISD::INTRINSIC_W_CHAIN;
19145 Info.memVT = MVT::i128;
19146 Info.ptrVal = I.getArgOperand(0);
19147 Info.offset = 0;
19148 Info.align = Align(16);
19151 Infos.push_back(Info);
19152 return;
19153 case Intrinsic::ppc_atomic_load_i128:
19154 Info.opc = ISD::INTRINSIC_W_CHAIN;
19155 Info.memVT = MVT::i128;
19156 Info.ptrVal = I.getArgOperand(0);
19157 Info.offset = 0;
19158 Info.align = Align(16);
19160 Infos.push_back(Info);
19161 return;
19162 case Intrinsic::ppc_atomic_store_i128:
19163 Info.opc = ISD::INTRINSIC_VOID;
19164 Info.memVT = MVT::i128;
19165 Info.ptrVal = I.getArgOperand(2);
19166 Info.offset = 0;
19167 Info.align = Align(16);
19169 Infos.push_back(Info);
19170 return;
19171 case Intrinsic::ppc_altivec_lvx:
19172 case Intrinsic::ppc_altivec_lvxl:
19173 case Intrinsic::ppc_altivec_lvebx:
19174 case Intrinsic::ppc_altivec_lvehx:
19175 case Intrinsic::ppc_altivec_lvewx:
19176 case Intrinsic::ppc_vsx_lxvd2x:
19177 case Intrinsic::ppc_vsx_lxvw4x:
19178 case Intrinsic::ppc_vsx_lxvd2x_be:
19179 case Intrinsic::ppc_vsx_lxvw4x_be:
19180 case Intrinsic::ppc_vsx_lxvl:
19181 case Intrinsic::ppc_vsx_lxvll: {
19182 EVT VT;
19183 switch (Intrinsic) {
19184 case Intrinsic::ppc_altivec_lvebx:
19185 VT = MVT::i8;
19186 break;
19187 case Intrinsic::ppc_altivec_lvehx:
19188 VT = MVT::i16;
19189 break;
19190 case Intrinsic::ppc_altivec_lvewx:
19191 VT = MVT::i32;
19192 break;
19193 case Intrinsic::ppc_vsx_lxvd2x:
19194 case Intrinsic::ppc_vsx_lxvd2x_be:
19195 VT = MVT::v2f64;
19196 break;
19197 default:
19198 VT = MVT::v4i32;
19199 break;
19200 }
19201
19202 Info.opc = ISD::INTRINSIC_W_CHAIN;
19203 Info.memVT = VT;
19204 Info.ptrVal = I.getArgOperand(0);
19205 Info.offset = -VT.getStoreSize()+1;
19206 Info.size = 2*VT.getStoreSize()-1;
19207 Info.align = Align(1);
19208 Info.flags = MachineMemOperand::MOLoad;
19209 Infos.push_back(Info);
19210 return;
19211 }
19212 case Intrinsic::ppc_altivec_stvx:
19213 case Intrinsic::ppc_altivec_stvxl:
19214 case Intrinsic::ppc_altivec_stvebx:
19215 case Intrinsic::ppc_altivec_stvehx:
19216 case Intrinsic::ppc_altivec_stvewx:
19217 case Intrinsic::ppc_vsx_stxvd2x:
19218 case Intrinsic::ppc_vsx_stxvw4x:
19219 case Intrinsic::ppc_vsx_stxvd2x_be:
19220 case Intrinsic::ppc_vsx_stxvw4x_be:
19221 case Intrinsic::ppc_vsx_stxvl:
19222 case Intrinsic::ppc_vsx_stxvll: {
19223 EVT VT;
19224 switch (Intrinsic) {
19225 case Intrinsic::ppc_altivec_stvebx:
19226 VT = MVT::i8;
19227 break;
19228 case Intrinsic::ppc_altivec_stvehx:
19229 VT = MVT::i16;
19230 break;
19231 case Intrinsic::ppc_altivec_stvewx:
19232 VT = MVT::i32;
19233 break;
19234 case Intrinsic::ppc_vsx_stxvd2x:
19235 case Intrinsic::ppc_vsx_stxvd2x_be:
19236 VT = MVT::v2f64;
19237 break;
19238 default:
19239 VT = MVT::v4i32;
19240 break;
19241 }
19242
19243 Info.opc = ISD::INTRINSIC_VOID;
19244 Info.memVT = VT;
19245 Info.ptrVal = I.getArgOperand(1);
19246 Info.offset = -VT.getStoreSize()+1;
19247 Info.size = 2*VT.getStoreSize()-1;
19248 Info.align = Align(1);
19249 Info.flags = MachineMemOperand::MOStore;
19250 Infos.push_back(Info);
19251 return;
19252 }
19253 case Intrinsic::ppc_stdcx:
19254 case Intrinsic::ppc_stwcx:
19255 case Intrinsic::ppc_sthcx:
19256 case Intrinsic::ppc_stbcx: {
19257 EVT VT;
19258 auto Alignment = Align(8);
19259 switch (Intrinsic) {
19260 case Intrinsic::ppc_stdcx:
19261 VT = MVT::i64;
19262 break;
19263 case Intrinsic::ppc_stwcx:
19264 VT = MVT::i32;
19265 Alignment = Align(4);
19266 break;
19267 case Intrinsic::ppc_sthcx:
19268 VT = MVT::i16;
19269 Alignment = Align(2);
19270 break;
19271 case Intrinsic::ppc_stbcx:
19272 VT = MVT::i8;
19273 Alignment = Align(1);
19274 break;
19275 }
19276 Info.opc = ISD::INTRINSIC_W_CHAIN;
19277 Info.memVT = VT;
19278 Info.ptrVal = I.getArgOperand(0);
19279 Info.offset = 0;
19280 Info.align = Alignment;
19282 Infos.push_back(Info);
19283 return;
19284 }
19285 default:
19286 break;
19287 }
19288}
19289
19290/// It returns EVT::Other if the type should be determined using generic
19291/// target-independent logic.
19293 LLVMContext &Context, const MemOp &Op,
19294 const AttributeList &FuncAttributes) const {
19295 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19296 // We should use Altivec/VSX loads and stores when available. For unaligned
19297 // addresses, unaligned VSX loads are only fast starting with the P8.
19298 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19299 if (Op.isMemset() && Subtarget.hasVSX()) {
19300 uint64_t TailSize = Op.size() % 16;
19301 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19302 // element if vector element type matches tail store. For tail size
19303 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19304 if (TailSize > 2 && TailSize <= 4) {
19305 return MVT::v8i16;
19306 }
19307 return MVT::v4i32;
19308 }
19309 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19310 return MVT::v4i32;
19311 }
19312 }
19313
19314 if (Subtarget.isPPC64()) {
19315 return MVT::i64;
19316 }
19317
19318 return MVT::i32;
19319}
19320
19321/// Returns true if it is beneficial to convert a load of a constant
19322/// to just the constant itself.
19324 Type *Ty) const {
19325 assert(Ty->isIntegerTy());
19326
19327 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19328 return !(BitSize == 0 || BitSize > 64);
19329}
19330
19332 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19333 return false;
19334 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19335 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19336 return NumBits1 == 64 && NumBits2 == 32;
19337}
19338
19340 if (!VT1.isInteger() || !VT2.isInteger())
19341 return false;
19342 unsigned NumBits1 = VT1.getSizeInBits();
19343 unsigned NumBits2 = VT2.getSizeInBits();
19344 return NumBits1 == 64 && NumBits2 == 32;
19345}
19346
19348 // Generally speaking, zexts are not free, but they are free when they can be
19349 // folded with other operations.
19350 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19351 EVT MemVT = LD->getMemoryVT();
19352 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19353 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19354 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19355 LD->getExtensionType() == ISD::ZEXTLOAD))
19356 return true;
19357 }
19358
19359 // FIXME: Add other cases...
19360 // - 32-bit shifts with a zext to i64
19361 // - zext after ctlz, bswap, etc.
19362 // - zext after and by a constant mask
19363
19364 return TargetLowering::isZExtFree(Val, VT2);
19365}
19366
19367bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19368 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19369 "invalid fpext types");
19370 // Extending to float128 is not free.
19371 if (DestVT == MVT::f128)
19372 return false;
19373 return true;
19374}
19375
19377 return isInt<16>(Imm) || isUInt<16>(Imm);
19378}
19379
19381 return isInt<16>(Imm) || isUInt<16>(Imm);
19382}
19383
19386 unsigned *Fast) const {
19388 return false;
19389
19390 // PowerPC supports unaligned memory access for simple non-vector types.
19391 // Although accessing unaligned addresses is not as efficient as accessing
19392 // aligned addresses, it is generally more efficient than manual expansion,
19393 // and generally only traps for software emulation when crossing page
19394 // boundaries.
19395
19396 if (!VT.isSimple())
19397 return false;
19398
19399 if (VT.isFloatingPoint() && !VT.isVector() &&
19400 !Subtarget.allowsUnalignedFPAccess())
19401 return false;
19402
19403 if (VT.getSimpleVT().isVector()) {
19404 if (Subtarget.hasVSX()) {
19405 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19406 VT != MVT::v4f32 && VT != MVT::v4i32)
19407 return false;
19408 } else {
19409 return false;
19410 }
19411 }
19412
19413 if (VT == MVT::ppcf128)
19414 return false;
19415
19416 if (Fast)
19417 *Fast = 1;
19418
19419 return true;
19420}
19421
19423 SDValue C) const {
19424 // Check integral scalar types.
19425 if (!VT.isScalarInteger())
19426 return false;
19427 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19428 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19429 return false;
19430 // This transformation will generate >= 2 operations. But the following
19431 // cases will generate <= 2 instructions during ISEL. So exclude them.
19432 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19433 // HW instruction, ie. MULLI
19434 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19435 // instruction is needed than case 1, ie. MULLI and RLDICR
19436 int64_t Imm = ConstNode->getSExtValue();
19437 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19438 Imm >>= Shift;
19439 if (isInt<16>(Imm))
19440 return false;
19441 uint64_t UImm = static_cast<uint64_t>(Imm);
19442 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19443 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19444 return true;
19445 }
19446 return false;
19447}
19448
19454
19456 Type *Ty) const {
19457 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19458 return false;
19459 switch (Ty->getScalarType()->getTypeID()) {
19460 case Type::FloatTyID:
19461 case Type::DoubleTyID:
19462 return true;
19463 case Type::FP128TyID:
19464 return Subtarget.hasP9Vector();
19465 default:
19466 return false;
19467 }
19468}
19469
19470// FIXME: add more patterns which are not profitable to hoist.
19472 if (!I->hasOneUse())
19473 return true;
19474
19475 Instruction *User = I->user_back();
19476 assert(User && "A single use instruction with no uses.");
19477
19478 switch (I->getOpcode()) {
19479 case Instruction::FMul: {
19480 // Don't break FMA, PowerPC prefers FMA.
19481 if (User->getOpcode() != Instruction::FSub &&
19482 User->getOpcode() != Instruction::FAdd)
19483 return true;
19484
19486 const Function *F = I->getFunction();
19487 const DataLayout &DL = F->getDataLayout();
19488 Type *Ty = User->getOperand(0)->getType();
19489 bool AllowContract = I->getFastMathFlags().allowContract() &&
19490 User->getFastMathFlags().allowContract();
19491
19492 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19494 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19495 }
19496 case Instruction::Load: {
19497 // Don't break "store (load float*)" pattern, this pattern will be combined
19498 // to "store (load int32)" in later InstCombine pass. See function
19499 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19500 // cycles than loading a 32 bit integer.
19501 LoadInst *LI = cast<LoadInst>(I);
19502 // For the loads that combineLoadToOperationType does nothing, like
19503 // ordered load, it should be profitable to hoist them.
19504 // For swifterror load, it can only be used for pointer to pointer type, so
19505 // later type check should get rid of this case.
19506 if (!LI->isUnordered())
19507 return true;
19508
19509 if (User->getOpcode() != Instruction::Store)
19510 return true;
19511
19512 if (I->getType()->getTypeID() != Type::FloatTyID)
19513 return true;
19514
19515 return false;
19516 }
19517 default:
19518 return true;
19519 }
19520 return true;
19521}
19522
19523const MCPhysReg *
19525 // LR is a callee-save register, but we must treat it as clobbered by any call
19526 // site. Hence we include LR in the scratch registers, which are in turn added
19527 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19528 // to CTR, which is used by any indirect call.
19529 static const MCPhysReg ScratchRegs[] = {
19530 PPC::X12, PPC::LR8, PPC::CTR8, 0
19531 };
19532
19533 return ScratchRegs;
19534}
19535
19537 const Constant *PersonalityFn) const {
19538 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19539}
19540
19542 const Constant *PersonalityFn) const {
19543 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19544}
19545
19546bool
19548 EVT VT , unsigned DefinedValues) const {
19549 if (VT == MVT::v2i64)
19550 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19551
19552 if (Subtarget.hasVSX())
19553 return true;
19554
19556}
19557
19559 if (DisableILPPref || Subtarget.enableMachineScheduler())
19561
19562 return Sched::ILP;
19563}
19564
19565// Create a fast isel object.
19567 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19568 const LibcallLoweringInfo *LibcallLowering) const {
19569 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19570}
19571
19572// 'Inverted' means the FMA opcode after negating one multiplicand.
19573// For example, (fma -a b c) = (fnmsub a b c)
19574static unsigned invertFMAOpcode(unsigned Opc) {
19575 switch (Opc) {
19576 default:
19577 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19578 case ISD::FMA:
19579 return PPCISD::FNMSUB;
19580 case PPCISD::FNMSUB:
19581 return ISD::FMA;
19582 }
19583}
19584
19586 bool LegalOps, bool OptForSize,
19588 unsigned Depth) const {
19590 return SDValue();
19591
19592 unsigned Opc = Op.getOpcode();
19593 EVT VT = Op.getValueType();
19594 SDNodeFlags Flags = Op.getNode()->getFlags();
19595
19596 switch (Opc) {
19597 case PPCISD::FNMSUB:
19598 if (!Op.hasOneUse() || !isTypeLegal(VT))
19599 break;
19600
19601 SDValue N0 = Op.getOperand(0);
19602 SDValue N1 = Op.getOperand(1);
19603 SDValue N2 = Op.getOperand(2);
19604 SDLoc Loc(Op);
19605
19607 SDValue NegN2 =
19608 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19609
19610 if (!NegN2)
19611 return SDValue();
19612
19613 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19614 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19615 // These transformations may change sign of zeroes. For example,
19616 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19617 if (Flags.hasNoSignedZeros()) {
19618 // Try and choose the cheaper one to negate.
19620 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19621 N0Cost, Depth + 1);
19622
19624 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19625 N1Cost, Depth + 1);
19626
19627 if (NegN0 && N0Cost <= N1Cost) {
19628 Cost = std::min(N0Cost, N2Cost);
19629 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19630 } else if (NegN1) {
19631 Cost = std::min(N1Cost, N2Cost);
19632 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19633 }
19634 }
19635
19636 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19637 if (isOperationLegal(ISD::FMA, VT)) {
19638 Cost = N2Cost;
19639 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19640 }
19641
19642 break;
19643 }
19644
19645 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19646 Cost, Depth);
19647}
19648
19649// Override to enable LOAD_STACK_GUARD lowering on Linux.
19651 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19652 return true;
19654}
19655
19657 bool ForCodeSize) const {
19658 if (!VT.isSimple() || !Subtarget.hasVSX())
19659 return false;
19660
19661 switch(VT.getSimpleVT().SimpleTy) {
19662 default:
19663 // For FP types that are currently not supported by PPC backend, return
19664 // false. Examples: f16, f80.
19665 return false;
19666 case MVT::f32:
19667 case MVT::f64: {
19668 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19669 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19670 return true;
19671 }
19672 bool IsExact;
19673 APSInt IntResult(16, false);
19674 // The rounding mode doesn't really matter because we only care about floats
19675 // that can be converted to integers exactly.
19676 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19677 // For exact values in the range [-16, 15] we can materialize the float.
19678 if (IsExact && IntResult <= 15 && IntResult >= -16)
19679 return true;
19680 return Imm.isZero();
19681 }
19682 case MVT::ppcf128:
19683 return Imm.isPosZero();
19684 }
19685}
19686
19687// For vector shift operation op, fold
19688// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19690 SelectionDAG &DAG) {
19691 SDValue N0 = N->getOperand(0);
19692 SDValue N1 = N->getOperand(1);
19693 EVT VT = N0.getValueType();
19694 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19695 unsigned Opcode = N->getOpcode();
19696 unsigned TargetOpcode;
19697
19698 switch (Opcode) {
19699 default:
19700 llvm_unreachable("Unexpected shift operation");
19701 case ISD::SHL:
19702 TargetOpcode = PPCISD::SHL;
19703 break;
19704 case ISD::SRL:
19705 TargetOpcode = PPCISD::SRL;
19706 break;
19707 case ISD::SRA:
19708 TargetOpcode = PPCISD::SRA;
19709 break;
19710 }
19711
19712 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19713 N1->getOpcode() == ISD::AND)
19714 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19715 if (Mask->getZExtValue() == OpSizeInBits - 1)
19716 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19717
19718 return SDValue();
19719}
19720
19721SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19722 DAGCombinerInfo &DCI) const {
19723 EVT VT = N->getValueType(0);
19724 assert(VT.isVector() && "Vector type expected.");
19725
19726 unsigned Opc = N->getOpcode();
19727 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19728 "Unexpected opcode.");
19729
19730 if (!isOperationLegal(Opc, VT))
19731 return SDValue();
19732
19733 EVT EltTy = VT.getScalarType();
19734 unsigned EltBits = EltTy.getSizeInBits();
19735 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19736 return SDValue();
19737
19738 SDValue N1 = N->getOperand(1);
19739 uint64_t SplatBits = 0;
19740 bool AddSplatCase = false;
19741 unsigned OpcN1 = N1.getOpcode();
19742 if (OpcN1 == PPCISD::VADD_SPLAT &&
19744 AddSplatCase = true;
19745 SplatBits = N1.getConstantOperandVal(0);
19746 }
19747
19748 if (!AddSplatCase) {
19749 if (OpcN1 != ISD::BUILD_VECTOR)
19750 return SDValue();
19751
19752 unsigned SplatBitSize;
19753 bool HasAnyUndefs;
19754 APInt APSplatBits, APSplatUndef;
19755 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19756 bool BVNIsConstantSplat =
19757 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19758 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19759 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19760 return SDValue();
19761 SplatBits = APSplatBits.getZExtValue();
19762 }
19763
19764 SDLoc DL(N);
19765 SDValue N0 = N->getOperand(0);
19766 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19767 // shift vector, which means the max value is 31/63. A shift vector of all
19768 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19769 // -16 to 15 range.
19770 if (SplatBits == (EltBits - 1)) {
19771 unsigned NewOpc;
19772 switch (Opc) {
19773 case ISD::SHL:
19774 NewOpc = PPCISD::SHL;
19775 break;
19776 case ISD::SRL:
19777 NewOpc = PPCISD::SRL;
19778 break;
19779 case ISD::SRA:
19780 NewOpc = PPCISD::SRA;
19781 break;
19782 }
19783 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19784 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19785 }
19786
19787 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19788 return SDValue();
19789
19790 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19791 // before the BUILD_VECTOR is replaced by a load.
19792 if (EltTy != MVT::i64 || SplatBits != 1)
19793 return SDValue();
19794
19795 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19796}
19797
19798SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19799 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19800 return Value;
19801
19802 if (N->getValueType(0).isVector())
19803 return combineVectorShift(N, DCI);
19804
19805 SDValue N0 = N->getOperand(0);
19806 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19807 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19808 N0.getOpcode() != ISD::SIGN_EXTEND ||
19809 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19810 N->getValueType(0) != MVT::i64)
19811 return SDValue();
19812
19813 // We can't save an operation here if the value is already extended, and
19814 // the existing shift is easier to combine.
19815 SDValue ExtsSrc = N0.getOperand(0);
19816 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19817 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19818 return SDValue();
19819
19820 SDLoc DL(N0);
19821 SDValue ShiftBy = SDValue(CN1, 0);
19822 // We want the shift amount to be i32 on the extswli, but the shift could
19823 // have an i64.
19824 if (ShiftBy.getValueType() == MVT::i64)
19825 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19826
19827 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19828 ShiftBy);
19829}
19830
19831SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19832 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19833 return Value;
19834
19835 if (N->getValueType(0).isVector())
19836 return combineVectorShift(N, DCI);
19837
19838 return SDValue();
19839}
19840
19841SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19842 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19843 return Value;
19844
19845 if (N->getValueType(0).isVector())
19846 return combineVectorShift(N, DCI);
19847
19848 return SDValue();
19849}
19850
19851// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19852// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19853// When C is zero, the equation (addi Z, -C) can be simplified to Z
19854// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19856 const PPCSubtarget &Subtarget) {
19857 if (!Subtarget.isPPC64())
19858 return SDValue();
19859
19860 SDValue LHS = N->getOperand(0);
19861 SDValue RHS = N->getOperand(1);
19862
19863 auto isZextOfCompareWithConstant = [](SDValue Op) {
19864 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19865 Op.getValueType() != MVT::i64)
19866 return false;
19867
19868 SDValue Cmp = Op.getOperand(0);
19869 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19870 Cmp.getOperand(0).getValueType() != MVT::i64)
19871 return false;
19872
19873 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19874 int64_t NegConstant = 0 - Constant->getSExtValue();
19875 // Due to the limitations of the addi instruction,
19876 // -C is required to be [-32768, 32767].
19877 return isInt<16>(NegConstant);
19878 }
19879
19880 return false;
19881 };
19882
19883 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19884 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19885
19886 // If there is a pattern, canonicalize a zext operand to the RHS.
19887 if (LHSHasPattern && !RHSHasPattern)
19888 std::swap(LHS, RHS);
19889 else if (!LHSHasPattern && !RHSHasPattern)
19890 return SDValue();
19891
19892 SDLoc DL(N);
19893 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19894 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19895 SDValue Cmp = RHS.getOperand(0);
19896 SDValue Z = Cmp.getOperand(0);
19897 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19898 int64_t NegConstant = 0 - Constant->getSExtValue();
19899
19900 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19901 default: break;
19902 case ISD::SETNE: {
19903 // when C == 0
19904 // --> addze X, (addic Z, -1).carry
19905 // /
19906 // add X, (zext(setne Z, C))--
19907 // \ when -32768 <= -C <= 32767 && C != 0
19908 // --> addze X, (addic (addi Z, -C), -1).carry
19909 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19910 DAG.getConstant(NegConstant, DL, MVT::i64));
19911 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19912 SDValue Addc =
19913 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19914 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19915 DAG.getConstant(0, DL, CarryType));
19916 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19917 DAG.getConstant(0, DL, MVT::i64),
19918 SDValue(Addc.getNode(), 1));
19919 }
19920 case ISD::SETEQ: {
19921 // when C == 0
19922 // --> addze X, (subfic Z, 0).carry
19923 // /
19924 // add X, (zext(sete Z, C))--
19925 // \ when -32768 <= -C <= 32767 && C != 0
19926 // --> addze X, (subfic (addi Z, -C), 0).carry
19927 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19928 DAG.getConstant(NegConstant, DL, MVT::i64));
19929 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19930 SDValue Subc =
19931 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19932 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19933 DAG.getConstant(0, DL, CarryType));
19934 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19935 DAG.getConstant(1UL, DL, CarryType));
19936 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19937 DAG.getConstant(0, DL, MVT::i64), Invert);
19938 }
19939 }
19940
19941 return SDValue();
19942}
19943
19944// Transform
19945// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19946// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19947// In this case both C1 and C2 must be known constants.
19948// C1+C2 must fit into a 34 bit signed integer.
19950 const PPCSubtarget &Subtarget) {
19951 if (!Subtarget.isUsingPCRelativeCalls())
19952 return SDValue();
19953
19954 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19955 // If we find that node try to cast the Global Address and the Constant.
19956 SDValue LHS = N->getOperand(0);
19957 SDValue RHS = N->getOperand(1);
19958
19959 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19960 std::swap(LHS, RHS);
19961
19962 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19963 return SDValue();
19964
19965 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19968
19969 // Check that both casts succeeded.
19970 if (!GSDN || !ConstNode)
19971 return SDValue();
19972
19973 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19974 SDLoc DL(GSDN);
19975
19976 // The signed int offset needs to fit in 34 bits.
19977 if (!isInt<34>(NewOffset))
19978 return SDValue();
19979
19980 // The new global address is a copy of the old global address except
19981 // that it has the updated Offset.
19982 SDValue GA =
19983 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19984 NewOffset, GSDN->getTargetFlags());
19985 SDValue MatPCRel =
19986 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19987 return MatPCRel;
19988}
19989
19990// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19991// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19992// Mathematical identity: X + 1 = X - (-1)
19993// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19994// Requirement: VSX feature for efficient xxleqv generation
19996 const PPCSubtarget &Subtarget) {
19997
19998 EVT VT = N->getValueType(0);
19999 if (!Subtarget.hasVSX())
20000 return SDValue();
20001
20002 // Handle v2i64, v4i32, v8i16 and v16i8 types
20003 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
20004 VT == MVT::v2i64))
20005 return SDValue();
20006
20007 SDValue LHS = N->getOperand(0);
20008 SDValue RHS = N->getOperand(1);
20009
20010 // Check if RHS is BUILD_VECTOR
20011 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20012 return SDValue();
20013
20014 // Check if all the elements are 1
20015 unsigned NumOfEles = RHS.getNumOperands();
20016 for (unsigned i = 0; i < NumOfEles; ++i) {
20017 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
20018 if (!CN || CN->getSExtValue() != 1)
20019 return SDValue();
20020 }
20021 SDLoc DL(N);
20022
20023 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
20024 SmallVector<SDValue, 4> Ops(4, MinusOne);
20025 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
20026
20027 // Bitcast to the target vector type
20028 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
20029
20030 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
20031}
20032
20033SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
20034 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
20035 return Value;
20036
20037 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
20038 return Value;
20039
20040 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
20041 return Value;
20042 return SDValue();
20043}
20044
20045// Detect TRUNCATE operations on bitcasts of float128 values.
20046// What we are looking for here is the situtation where we extract a subset
20047// of bits from a 128 bit float.
20048// This can be of two forms:
20049// 1) BITCAST of f128 feeding TRUNCATE
20050// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
20051// The reason this is required is because we do not have a legal i128 type
20052// and so we want to prevent having to store the f128 and then reload part
20053// of it.
20054SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
20055 DAGCombinerInfo &DCI) const {
20056 // If we are using CRBits then try that first.
20057 if (Subtarget.useCRBits()) {
20058 // Check if CRBits did anything and return that if it did.
20059 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
20060 return CRTruncValue;
20061 }
20062
20063 SDLoc dl(N);
20064 SDValue Op0 = N->getOperand(0);
20065
20066 // Looking for a truncate of i128 to i64.
20067 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
20068 return SDValue();
20069
20070 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
20071
20072 // SRL feeding TRUNCATE.
20073 if (Op0.getOpcode() == ISD::SRL) {
20074 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
20075 // The right shift has to be by 64 bits.
20076 if (!ConstNode || ConstNode->getZExtValue() != 64)
20077 return SDValue();
20078
20079 // Switch the element number to extract.
20080 EltToExtract = EltToExtract ? 0 : 1;
20081 // Update Op0 past the SRL.
20082 Op0 = Op0.getOperand(0);
20083 }
20084
20085 // BITCAST feeding a TRUNCATE possibly via SRL.
20086 if (Op0.getOpcode() == ISD::BITCAST &&
20087 Op0.getValueType() == MVT::i128 &&
20088 Op0.getOperand(0).getValueType() == MVT::f128) {
20089 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
20090 return DCI.DAG.getNode(
20091 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
20092 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
20093 }
20094 return SDValue();
20095}
20096
20097SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
20098 SelectionDAG &DAG = DCI.DAG;
20099
20100 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
20101 if (!ConstOpOrElement)
20102 return SDValue();
20103
20104 // An imul is usually smaller than the alternative sequence for legal type.
20106 isOperationLegal(ISD::MUL, N->getValueType(0)))
20107 return SDValue();
20108
20109 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
20110 switch (this->Subtarget.getCPUDirective()) {
20111 default:
20112 // TODO: enhance the condition for subtarget before pwr8
20113 return false;
20114 case PPC::DIR_PWR8:
20115 // type mul add shl
20116 // scalar 4 1 1
20117 // vector 7 2 2
20118 return true;
20119 case PPC::DIR_PWR9:
20120 case PPC::DIR_PWR10:
20121 case PPC::DIR_PWR11:
20123 // type mul add shl
20124 // scalar 5 2 2
20125 // vector 7 2 2
20126
20127 // The cycle RATIO of related operations are showed as a table above.
20128 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
20129 // scalar and vector type. For 2 instrs patterns, add/sub + shl
20130 // are 4, it is always profitable; but for 3 instrs patterns
20131 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
20132 // So we should only do it for vector type.
20133 return IsAddOne && IsNeg ? VT.isVector() : true;
20134 }
20135 };
20136
20137 EVT VT = N->getValueType(0);
20138 SDLoc DL(N);
20139
20140 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
20141 bool IsNeg = MulAmt.isNegative();
20142 APInt MulAmtAbs = MulAmt.abs();
20143
20144 if ((MulAmtAbs - 1).isPowerOf2()) {
20145 // (mul x, 2^N + 1) => (add (shl x, N), x)
20146 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
20147
20148 if (!IsProfitable(IsNeg, true, VT))
20149 return SDValue();
20150
20151 SDValue Op0 = N->getOperand(0);
20152 SDValue Op1 =
20153 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20154 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
20155 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
20156
20157 if (!IsNeg)
20158 return Res;
20159
20160 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
20161 } else if ((MulAmtAbs + 1).isPowerOf2()) {
20162 // (mul x, 2^N - 1) => (sub (shl x, N), x)
20163 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
20164
20165 if (!IsProfitable(IsNeg, false, VT))
20166 return SDValue();
20167
20168 SDValue Op0 = N->getOperand(0);
20169 SDValue Op1 =
20170 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20171 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
20172
20173 if (!IsNeg)
20174 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
20175 else
20176 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
20177
20178 } else {
20179 return SDValue();
20180 }
20181}
20182
20183// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
20184// in combiner since we need to check SD flags and other subtarget features.
20185SDValue PPCTargetLowering::combineFMALike(SDNode *N,
20186 DAGCombinerInfo &DCI) const {
20187 SDValue N0 = N->getOperand(0);
20188 SDValue N1 = N->getOperand(1);
20189 SDValue N2 = N->getOperand(2);
20190 SDNodeFlags Flags = N->getFlags();
20191 EVT VT = N->getValueType(0);
20192 SelectionDAG &DAG = DCI.DAG;
20193 unsigned Opc = N->getOpcode();
20195 bool LegalOps = !DCI.isBeforeLegalizeOps();
20196 SDLoc Loc(N);
20197
20198 if (!isOperationLegal(ISD::FMA, VT))
20199 return SDValue();
20200
20201 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
20202 // since (fnmsub a b c)=-0 while c-ab=+0.
20203 if (!Flags.hasNoSignedZeros())
20204 return SDValue();
20205
20206 // (fma (fneg a) b c) => (fnmsub a b c)
20207 // (fnmsub (fneg a) b c) => (fma a b c)
20208 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
20209 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
20210
20211 // (fma a (fneg b) c) => (fnmsub a b c)
20212 // (fnmsub a (fneg b) c) => (fma a b c)
20213 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
20214 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
20215
20216 return SDValue();
20217}
20218
20219bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
20220 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
20221 if (!Subtarget.is64BitELFABI())
20222 return false;
20223
20224 // If not a tail call then no need to proceed.
20225 if (!CI->isTailCall())
20226 return false;
20227
20228 // If sibling calls have been disabled and tail-calls aren't guaranteed
20229 // there is no reason to duplicate.
20230 auto &TM = getTargetMachine();
20231 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
20232 return false;
20233
20234 // Can't tail call a function called indirectly, or if it has variadic args.
20235 const Function *Callee = CI->getCalledFunction();
20236 if (!Callee || Callee->isVarArg())
20237 return false;
20238
20239 // Make sure the callee and caller calling conventions are eligible for tco.
20240 const Function *Caller = CI->getParent()->getParent();
20241 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
20242 CI->getCallingConv()))
20243 return false;
20244
20245 // If the function is local then we have a good chance at tail-calling it
20246 return getTargetMachine().shouldAssumeDSOLocal(Callee);
20247}
20248
20249bool PPCTargetLowering::
20250isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
20251 const Value *Mask = AndI.getOperand(1);
20252 // If the mask is suitable for andi. or andis. we should sink the and.
20253 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
20254 // Can't handle constants wider than 64-bits.
20255 if (CI->getBitWidth() > 64)
20256 return false;
20257 int64_t ConstVal = CI->getZExtValue();
20258 return isUInt<16>(ConstVal) ||
20259 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
20260 }
20261
20262 // For non-constant masks, we can always use the record-form and.
20263 return true;
20264}
20265
20266/// getAddrModeForFlags - Based on the set of address flags, select the most
20267/// optimal instruction format to match by.
20268PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20269 // This is not a node we should be handling here.
20270 if (Flags == PPC::MOF_None)
20271 return PPC::AM_None;
20272 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20273 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20274 if ((Flags & FlagSet) == FlagSet)
20275 return PPC::AM_DForm;
20276 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20277 if ((Flags & FlagSet) == FlagSet)
20278 return PPC::AM_DSForm;
20279 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20280 if ((Flags & FlagSet) == FlagSet)
20281 return PPC::AM_DQForm;
20282 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20283 if ((Flags & FlagSet) == FlagSet)
20284 return PPC::AM_PrefixDForm;
20285 // If no other forms are selected, return an X-Form as it is the most
20286 // general addressing mode.
20287 return PPC::AM_XForm;
20288}
20289
20290/// Set alignment flags based on whether or not the Frame Index is aligned.
20291/// Utilized when computing flags for address computation when selecting
20292/// load and store instructions.
20293static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20294 SelectionDAG &DAG) {
20295 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20296 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20297 if (!FI)
20298 return;
20300 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20301 // If this is (add $FI, $S16Imm), the alignment flags are already set
20302 // based on the immediate. We just need to clear the alignment flags
20303 // if the FI alignment is weaker.
20304 if ((FrameIndexAlign % 4) != 0)
20305 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20306 if ((FrameIndexAlign % 16) != 0)
20307 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20308 // If the address is a plain FrameIndex, set alignment flags based on
20309 // FI alignment.
20310 if (!IsAdd) {
20311 if ((FrameIndexAlign % 4) == 0)
20312 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20313 if ((FrameIndexAlign % 16) == 0)
20314 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20315 }
20316}
20317
20318/// Given a node, compute flags that are used for address computation when
20319/// selecting load and store instructions. The flags computed are stored in
20320/// FlagSet. This function takes into account whether the node is a constant,
20321/// an ADD, OR, or a constant, and computes the address flags accordingly.
20322static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20323 SelectionDAG &DAG) {
20324 // Set the alignment flags for the node depending on if the node is
20325 // 4-byte or 16-byte aligned.
20326 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20327 if ((Imm & 0x3) == 0)
20328 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20329 if ((Imm & 0xf) == 0)
20330 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20331 };
20332
20334 // All 32-bit constants can be computed as LIS + Disp.
20335 const APInt &ConstImm = CN->getAPIntValue();
20336 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20337 FlagSet |= PPC::MOF_AddrIsSImm32;
20338 SetAlignFlagsForImm(ConstImm.getZExtValue());
20339 setAlignFlagsForFI(N, FlagSet, DAG);
20340 }
20341 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20342 FlagSet |= PPC::MOF_RPlusSImm34;
20343 else // Let constant materialization handle large constants.
20344 FlagSet |= PPC::MOF_NotAddNorCst;
20345 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20346 // This address can be represented as an addition of:
20347 // - Register + Imm16 (possibly a multiple of 4/16)
20348 // - Register + Imm34
20349 // - Register + PPCISD::Lo
20350 // - Register + Register
20351 // In any case, we won't have to match this as Base + Zero.
20352 SDValue RHS = N.getOperand(1);
20354 const APInt &ConstImm = CN->getAPIntValue();
20355 if (ConstImm.isSignedIntN(16)) {
20356 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20357 SetAlignFlagsForImm(ConstImm.getZExtValue());
20358 setAlignFlagsForFI(N, FlagSet, DAG);
20359 }
20360 if (ConstImm.isSignedIntN(34))
20361 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20362 else
20363 FlagSet |= PPC::MOF_RPlusR; // Register.
20364 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20365 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20366 else
20367 FlagSet |= PPC::MOF_RPlusR;
20368 } else { // The address computation is not a constant or an addition.
20369 setAlignFlagsForFI(N, FlagSet, DAG);
20370 FlagSet |= PPC::MOF_NotAddNorCst;
20371 }
20372}
20373
20374static bool isPCRelNode(SDValue N) {
20375 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20380}
20381
20382/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20383/// the address flags of the load/store instruction that is to be matched.
20384unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20385 SelectionDAG &DAG) const {
20386 unsigned FlagSet = PPC::MOF_None;
20387
20388 // Compute subtarget flags.
20389 if (!Subtarget.hasP9Vector())
20390 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20391 else
20392 FlagSet |= PPC::MOF_SubtargetP9;
20393
20394 if (Subtarget.hasPrefixInstrs())
20395 FlagSet |= PPC::MOF_SubtargetP10;
20396
20397 if (Subtarget.hasSPE())
20398 FlagSet |= PPC::MOF_SubtargetSPE;
20399
20400 // Check if we have a PCRel node and return early.
20401 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20402 return FlagSet;
20403
20404 // If the node is the paired load/store intrinsics, compute flags for
20405 // address computation and return early.
20406 unsigned ParentOp = Parent->getOpcode();
20407 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20408 (ParentOp == ISD::INTRINSIC_VOID))) {
20409 unsigned ID = Parent->getConstantOperandVal(1);
20410 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20411 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20412 ? Parent->getOperand(2)
20413 : Parent->getOperand(3);
20414 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20415 FlagSet |= PPC::MOF_Vector;
20416 return FlagSet;
20417 }
20418 }
20419
20420 // Mark this as something we don't want to handle here if it is atomic
20421 // or pre-increment instruction.
20422 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20423 if (LSB->isIndexed())
20424 return PPC::MOF_None;
20425
20426 // Compute in-memory type flags. This is based on if there are scalars,
20427 // floats or vectors.
20428 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20429 assert(MN && "Parent should be a MemSDNode!");
20430 EVT MemVT = MN->getMemoryVT();
20431 unsigned Size = MemVT.getSizeInBits();
20432 if (MemVT.isScalarInteger()) {
20433 assert(Size <= 128 &&
20434 "Not expecting scalar integers larger than 16 bytes!");
20435 if (Size < 32)
20436 FlagSet |= PPC::MOF_SubWordInt;
20437 else if (Size == 32)
20438 FlagSet |= PPC::MOF_WordInt;
20439 else
20440 FlagSet |= PPC::MOF_DoubleWordInt;
20441 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20442 if (Size == 128)
20443 FlagSet |= PPC::MOF_Vector;
20444 else if (Size == 256) {
20445 assert(Subtarget.pairedVectorMemops() &&
20446 "256-bit vectors are only available when paired vector memops is "
20447 "enabled!");
20448 FlagSet |= PPC::MOF_Vector;
20449 } else
20450 llvm_unreachable("Not expecting illegal vectors!");
20451 } else { // Floating point type: can be scalar, f128 or vector types.
20452 if (Size == 32 || Size == 64)
20453 FlagSet |= PPC::MOF_ScalarFloat;
20454 else if (MemVT == MVT::f128 || MemVT.isVector())
20455 FlagSet |= PPC::MOF_Vector;
20456 else
20457 llvm_unreachable("Not expecting illegal scalar floats!");
20458 }
20459
20460 // Compute flags for address computation.
20461 computeFlagsForAddressComputation(N, FlagSet, DAG);
20462
20463 // Compute type extension flags.
20464 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20465 switch (LN->getExtensionType()) {
20466 case ISD::SEXTLOAD:
20467 FlagSet |= PPC::MOF_SExt;
20468 break;
20469 case ISD::EXTLOAD:
20470 case ISD::ZEXTLOAD:
20471 FlagSet |= PPC::MOF_ZExt;
20472 break;
20473 case ISD::NON_EXTLOAD:
20474 FlagSet |= PPC::MOF_NoExt;
20475 break;
20476 }
20477 } else
20478 FlagSet |= PPC::MOF_NoExt;
20479
20480 // For integers, no extension is the same as zero extension.
20481 // We set the extension mode to zero extension so we don't have
20482 // to add separate entries in AddrModesMap for loads and stores.
20483 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20484 FlagSet |= PPC::MOF_ZExt;
20485 FlagSet &= ~PPC::MOF_NoExt;
20486 }
20487
20488 // If we don't have prefixed instructions, 34-bit constants should be
20489 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20490 bool IsNonP1034BitConst =
20492 FlagSet) == PPC::MOF_RPlusSImm34;
20493 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20494 IsNonP1034BitConst)
20495 FlagSet |= PPC::MOF_NotAddNorCst;
20496
20497 return FlagSet;
20498}
20499
20500/// SelectForceXFormMode - Given the specified address, force it to be
20501/// represented as an indexed [r+r] operation (an XForm instruction).
20503 SDValue &Base,
20504 SelectionDAG &DAG) const {
20505
20507 int16_t ForceXFormImm = 0;
20508 if (provablyDisjointOr(DAG, N) &&
20509 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20510 Disp = N.getOperand(0);
20511 Base = N.getOperand(1);
20512 return Mode;
20513 }
20514
20515 // If the address is the result of an add, we will utilize the fact that the
20516 // address calculation includes an implicit add. However, we can reduce
20517 // register pressure if we do not materialize a constant just for use as the
20518 // index register. We only get rid of the add if it is not an add of a
20519 // value and a 16-bit signed constant and both have a single use.
20520 if (N.getOpcode() == ISD::ADD &&
20521 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20522 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20523 Disp = N.getOperand(0);
20524 Base = N.getOperand(1);
20525 return Mode;
20526 }
20527
20528 // Otherwise, use R0 as the base register.
20529 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20530 N.getValueType());
20531 Base = N;
20532
20533 return Mode;
20534}
20535
20537 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20538 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20539 EVT ValVT = Val.getValueType();
20540 // If we are splitting a scalar integer into f64 parts (i.e. so they
20541 // can be placed into VFRC registers), we need to zero extend and
20542 // bitcast the values. This will ensure the value is placed into a
20543 // VSR using direct moves or stack operations as needed.
20544 if (PartVT == MVT::f64 &&
20545 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20546 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20547 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20548 Parts[0] = Val;
20549 return true;
20550 }
20551 return false;
20552}
20553
20554SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20555 SelectionDAG &DAG) const {
20556 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20558 EVT RetVT = Op.getValueType();
20559 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20560 SDValue Callee =
20561 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20562 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20564 for (const SDValue &N : Op->op_values()) {
20565 EVT ArgVT = N.getValueType();
20566 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20567 TargetLowering::ArgListEntry Entry(N, ArgTy);
20568 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20569 Entry.IsZExt = !Entry.IsSExt;
20570 Args.push_back(Entry);
20571 }
20572
20573 SDValue InChain = DAG.getEntryNode();
20574 SDValue TCChain = InChain;
20575 const Function &F = DAG.getMachineFunction().getFunction();
20576 bool isTailCall =
20577 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20578 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20579 if (isTailCall)
20580 InChain = TCChain;
20581 CLI.setDebugLoc(SDLoc(Op))
20582 .setChain(InChain)
20583 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20584 .setTailCall(isTailCall)
20585 .setSExtResult(SignExtend)
20586 .setZExtResult(!SignExtend)
20588 return TLI.LowerCallTo(CLI).first;
20589}
20590
20591SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20592 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20593 SelectionDAG &DAG) const {
20594 if (Op.getValueType() == MVT::f32)
20595 return lowerToLibCall(LibCallFloatName, Op, DAG);
20596
20597 if (Op.getValueType() == MVT::f64)
20598 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20599
20600 return SDValue();
20601}
20602
20603bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20604 SDNodeFlags Flags = Op.getNode()->getFlags();
20605 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20606 Flags.hasNoNaNs() && Flags.hasNoInfs();
20607}
20608
20609bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20610 return Op.getNode()->getFlags().hasApproximateFuncs();
20611}
20612
20613bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20615}
20616
20617SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20618 const char *LibCallFloatName,
20619 const char *LibCallDoubleNameFinite,
20620 const char *LibCallFloatNameFinite,
20621 SDValue Op,
20622 SelectionDAG &DAG) const {
20623 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20624 return SDValue();
20625
20626 if (!isLowringToMASSFiniteSafe(Op))
20627 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20628 DAG);
20629
20630 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20631 LibCallDoubleNameFinite, Op, DAG);
20632}
20633
20634SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20635 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20636 "__xl_powf_finite", Op, DAG);
20637}
20638
20639SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20640 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20641 "__xl_sinf_finite", Op, DAG);
20642}
20643
20644SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20645 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20646 "__xl_cosf_finite", Op, DAG);
20647}
20648
20649SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20650 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20651 "__xl_logf_finite", Op, DAG);
20652}
20653
20654SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20655 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20656 "__xl_log10f_finite", Op, DAG);
20657}
20658
20659SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20660 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20661 "__xl_expf_finite", Op, DAG);
20662}
20663
20664// If we happen to match to an aligned D-Form, check if the Frame Index is
20665// adequately aligned. If it is not, reset the mode to match to X-Form.
20666static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20669 return;
20670 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20673}
20674
20675/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20676/// compute the address flags of the node, get the optimal address mode based
20677/// on the flags, and set the Base and Disp based on the address mode.
20679 SDValue N, SDValue &Disp,
20680 SDValue &Base,
20681 SelectionDAG &DAG,
20682 MaybeAlign Align) const {
20683 SDLoc DL(Parent);
20684
20685 // Compute the address flags.
20686 unsigned Flags = computeMOFlags(Parent, N, DAG);
20687
20688 // Get the optimal address mode based on the Flags.
20689 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20690
20691 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20692 // Select an X-Form load if it is not.
20693 setXFormForUnalignedFI(N, Flags, Mode);
20694
20695 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20696 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20697 assert(Subtarget.isUsingPCRelativeCalls() &&
20698 "Must be using PC-Relative calls when a valid PC-Relative node is "
20699 "present!");
20700 Mode = PPC::AM_PCRel;
20701 }
20702
20703 // Set Base and Disp accordingly depending on the address mode.
20704 switch (Mode) {
20705 case PPC::AM_DForm:
20706 case PPC::AM_DSForm:
20707 case PPC::AM_DQForm: {
20708 // This is a register plus a 16-bit immediate. The base will be the
20709 // register and the displacement will be the immediate unless it
20710 // isn't sufficiently aligned.
20711 if (Flags & PPC::MOF_RPlusSImm16) {
20712 SDValue Op0 = N.getOperand(0);
20713 SDValue Op1 = N.getOperand(1);
20714 int16_t Imm = Op1->getAsZExtVal();
20715 if (!Align || isAligned(*Align, Imm)) {
20716 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20717 Base = Op0;
20719 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20720 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20721 }
20722 break;
20723 }
20724 }
20725 // This is a register plus the @lo relocation. The base is the register
20726 // and the displacement is the global address.
20727 else if (Flags & PPC::MOF_RPlusLo) {
20728 Disp = N.getOperand(1).getOperand(0); // The global address.
20733 Base = N.getOperand(0);
20734 break;
20735 }
20736 // This is a constant address at most 32 bits. The base will be
20737 // zero or load-immediate-shifted and the displacement will be
20738 // the low 16 bits of the address.
20739 else if (Flags & PPC::MOF_AddrIsSImm32) {
20740 auto *CN = cast<ConstantSDNode>(N);
20741 EVT CNType = CN->getValueType(0);
20742 uint64_t CNImm = CN->getZExtValue();
20743 // If this address fits entirely in a 16-bit sext immediate field, codegen
20744 // this as "d, 0".
20745 int16_t Imm;
20746 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20747 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20748 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20749 CNType);
20750 break;
20751 }
20752 // Handle 32-bit sext immediate with LIS + Addr mode.
20753 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20754 (!Align || isAligned(*Align, CNImm))) {
20755 int32_t Addr = (int32_t)CNImm;
20756 // Otherwise, break this down into LIS + Disp.
20757 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20758 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20759 MVT::i32);
20760 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20761 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20762 break;
20763 }
20764 }
20765 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20766 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20768 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20769 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20770 } else
20771 Base = N;
20772 break;
20773 }
20774 case PPC::AM_PrefixDForm: {
20775 int64_t Imm34 = 0;
20776 unsigned Opcode = N.getOpcode();
20777 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20778 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20779 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20780 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20781 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20782 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20783 else
20784 Base = N.getOperand(0);
20785 } else if (isIntS34Immediate(N, Imm34)) {
20786 // The address is a 34-bit signed immediate.
20787 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20788 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20789 }
20790 break;
20791 }
20792 case PPC::AM_PCRel: {
20793 // When selecting PC-Relative instructions, "Base" is not utilized as
20794 // we select the address as [PC+imm].
20795 Disp = N;
20796 break;
20797 }
20798 case PPC::AM_None:
20799 break;
20800 default: { // By default, X-Form is always available to be selected.
20801 // When a frame index is not aligned, we also match by XForm.
20803 Base = FI ? N : N.getOperand(1);
20804 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20805 N.getValueType())
20806 : N.getOperand(0);
20807 break;
20808 }
20809 }
20810 return Mode;
20811}
20812
20814 bool Return,
20815 bool IsVarArg) const {
20816 switch (CC) {
20817 case CallingConv::Cold:
20818 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20819 default:
20820 return CC_PPC64_ELF;
20821 }
20822}
20823
20825 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20826}
20827
20830 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20831 if (shouldInlineQuadwordAtomics() && Size == 128)
20833
20834 switch (AI->getOperation()) {
20840 default:
20842 }
20843
20844 llvm_unreachable("unreachable atomicrmw operation");
20845}
20846
20855
20856static Intrinsic::ID
20858 switch (BinOp) {
20859 default:
20860 llvm_unreachable("Unexpected AtomicRMW BinOp");
20862 return Intrinsic::ppc_atomicrmw_xchg_i128;
20863 case AtomicRMWInst::Add:
20864 return Intrinsic::ppc_atomicrmw_add_i128;
20865 case AtomicRMWInst::Sub:
20866 return Intrinsic::ppc_atomicrmw_sub_i128;
20867 case AtomicRMWInst::And:
20868 return Intrinsic::ppc_atomicrmw_and_i128;
20869 case AtomicRMWInst::Or:
20870 return Intrinsic::ppc_atomicrmw_or_i128;
20871 case AtomicRMWInst::Xor:
20872 return Intrinsic::ppc_atomicrmw_xor_i128;
20874 return Intrinsic::ppc_atomicrmw_nand_i128;
20875 }
20876}
20877
20879 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20880 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20881 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20882 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20883 Type *ValTy = Incr->getType();
20884 assert(ValTy->getPrimitiveSizeInBits() == 128);
20885 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20886 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20887 Value *IncrHi =
20888 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20889 Value *LoHi = Builder.CreateIntrinsic(
20891 {AlignedAddr, IncrLo, IncrHi});
20892 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20893 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20894 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20895 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20896 return Builder.CreateOr(
20897 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20898}
20899
20901 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20902 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20903 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20904 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20905 Type *ValTy = CmpVal->getType();
20906 assert(ValTy->getPrimitiveSizeInBits() == 128);
20907 Function *IntCmpXchg =
20908 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20909 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20910 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20911 Value *CmpHi =
20912 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20913 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20914 Value *NewHi =
20915 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20916 emitLeadingFence(Builder, CI, Ord);
20917 Value *LoHi =
20918 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20919 emitTrailingFence(Builder, CI, Ord);
20920 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20921 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20922 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20923 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20924 return Builder.CreateOr(
20925 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20926}
20927
20929 return Subtarget.useCRBits();
20930}
20931
20932/// Shuffle masks for vectors of bits are not legal as such vectors are
20933/// reserved for MMA/DM.
20934bool PPCTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
20935 if (VT.getScalarType() == MVT::i1)
20936 return false;
20937 return TargetLowering::isShuffleMaskLegal(Mask, VT);
20938}
20939
20940// Optimize the following patterns using vbpermq/vbpermd:
20941// i16 = bitcast(v16i1 truncate(v16i8))
20942// i8 = bitcast(v8i1 truncate(v8i16))
20943// i8 = bitcast(v8i1 truncate(v8i8))
20944SDValue PPCTargetLowering::DAGCombineBitcast(SDNode *N,
20945 DAGCombinerInfo &DCI) const {
20946 SDValue Op0 = N->getOperand(0);
20947 if (Op0.getOpcode() != ISD::TRUNCATE)
20948 return SDValue();
20949 SDValue Src = Op0.getOperand(0);
20950 EVT ResVT = N->getValueType(0);
20951 EVT TruncResVT = Op0.getValueType();
20952 EVT SrcVT = Src.getValueType();
20953 SDLoc dl(N);
20954 SelectionDAG &DAG = DCI.DAG;
20955 bool IsLittleEndian = Subtarget.isLittleEndian();
20956
20957 if (ResVT != MVT::i16 && ResVT != MVT::i8)
20958 return SDValue();
20959 SDValue VBPerm =
20960 GenerateVBPERM(DAG, dl, Src, SrcVT, TruncResVT, IsLittleEndian);
20961 if (!VBPerm)
20962 return SDValue();
20963 SDValue ForExtract = DAG.getBitcast(MVT::v4i32, VBPerm);
20964 SDValue Extracted =
20965 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, ForExtract,
20966 DAG.getIntPtrConstant(IsLittleEndian ? 2 : 1, dl));
20967 return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Extracted);
20968}
20969
20970SDValue PPCTargetLowering::GenerateVBPERM(SelectionDAG &DAG, SDLoc dl,
20971 SDValue Src, EVT SrcVT, EVT ResVT,
20972 bool IsLE) const {
20973 bool IsV16i8 = (ResVT == MVT::v16i1 && SrcVT == MVT::v16i8);
20974 bool IsV8i16 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i16);
20975 bool IsV8i8 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i8);
20976
20977 if (!IsV16i8 && !IsV8i16 && !IsV8i8)
20978 return SDValue();
20979
20980 if (IsV8i8) {
20981 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i8,
20982 DAG.getUNDEF(MVT::v16i8), Src,
20983 DAG.getIntPtrConstant(0, dl));
20984 }
20985 SmallVector<int, 16> BitIndices(16, 128);
20986 unsigned NumElts = SrcVT.getVectorNumElements();
20987 unsigned EltSize = SrcVT.getScalarType().getSizeInBits();
20988 for (int Idx = 0, End = SrcVT.getVectorNumElements(); Idx < End; Idx++) {
20989 BitIndices[Idx] = EltSize * (NumElts - Idx) - 1;
20990 if (IsV8i8 && IsLE)
20991 BitIndices[Idx] += 64;
20992 }
20993 if (!IsLE)
20994 std::reverse(BitIndices.begin(), BitIndices.end());
20996 for (auto Idx : BitIndices)
20997 BVOps.push_back(DAG.getConstant(Idx, dl, MVT::i8));
20998 SDValue VRB = DAG.getBuildVector(MVT::v16i8, dl, BVOps);
20999 return DAG.getNode(
21000 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
21001 DAG.getConstant(Intrinsic::ppc_altivec_vbpermq, dl, MVT::i32),
21002 DAG.getBitcast(MVT::v16i8, Src), VRB);
21003}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
MachineInstr unsigned OpIdx
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static MachineBasicBlock * emitAtomicCmpSwapSoftware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit software-emulated atomic compare-and-swap for I8/I16 without hardware partword atomic support.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool IsSelectCC(unsigned Opcode)
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG)
Optimize the bitfloor(X) pattern for PowerPC.
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static void createAtomicLoopBlocks(MachineFunction *F, MachineBasicBlock *BB, MachineBasicBlock *&loop1MBB, MachineBasicBlock *&loop2MBB, MachineBasicBlock *&exitMBB, MachineInstr &MI, MachineFunction::iterator It)
Helper function to create basic blocks for atomic compare-and-swap.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static MachineBasicBlock * emitAtomicCmpSwapHardware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16 with partword atomic support.
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static void signExtendOperandIfUnknown(MachineInstr &MI, MachineBasicBlock *BB, unsigned OpIdx, bool IsByte, const PPCInstrInfo *TII)
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5899
bool isDenormal() const
Definition APFloat.h:1539
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:200
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
MCRegister getGlueCodeDescriptorRegister() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:184
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:975
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:155
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:32
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:58
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.